Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                 host_arm64_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2013-2017 OpenWorks
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_basictypes.h"
     32 #include "libvex.h"
     33 #include "libvex_trc_values.h"
     34 
     35 #include "main_util.h"
     36 #include "host_generic_regs.h"
     37 #include "host_arm64_defs.h"
     38 
     39 
     40 /* --------- Registers. --------- */
     41 
     42 /* The usual HReg abstraction.  We use the following classes only:
     43      X regs (64 bit int)
     44      D regs (64 bit float, also used for 32 bit float)
     45      Q regs (128 bit vector)
     46 */
     47 
     48 const RRegUniverse* getRRegUniverse_ARM64 ( void )
     49 {
     50    /* The real-register universe is a big constant, so we just want to
     51       initialise it once. */
     52    static RRegUniverse rRegUniverse_ARM64;
     53    static Bool         rRegUniverse_ARM64_initted = False;
     54 
     55    /* Handy shorthand, nothing more */
     56    RRegUniverse* ru = &rRegUniverse_ARM64;
     57 
     58    /* This isn't thread-safe.  Sigh. */
     59    if (LIKELY(rRegUniverse_ARM64_initted))
     60       return ru;
     61 
     62    RRegUniverse__init(ru);
     63 
     64    /* Add the registers.  The initial segment of this array must be
     65       those available for allocation by reg-alloc, and those that
     66       follow are not available for allocation. */
     67 
     68    ru->regs[ru->size++] = hregARM64_X22();
     69    ru->regs[ru->size++] = hregARM64_X23();
     70    ru->regs[ru->size++] = hregARM64_X24();
     71    ru->regs[ru->size++] = hregARM64_X25();
     72    ru->regs[ru->size++] = hregARM64_X26();
     73    ru->regs[ru->size++] = hregARM64_X27();
     74    ru->regs[ru->size++] = hregARM64_X28();
     75 
     76    ru->regs[ru->size++] = hregARM64_X0();
     77    ru->regs[ru->size++] = hregARM64_X1();
     78    ru->regs[ru->size++] = hregARM64_X2();
     79    ru->regs[ru->size++] = hregARM64_X3();
     80    ru->regs[ru->size++] = hregARM64_X4();
     81    ru->regs[ru->size++] = hregARM64_X5();
     82    ru->regs[ru->size++] = hregARM64_X6();
     83    ru->regs[ru->size++] = hregARM64_X7();
     84    // X8 is used as a ProfInc temporary, not available to regalloc.
     85    // X9 is a chaining/spill temporary, not available to regalloc.
     86 
     87    // Do we really need all these?
     88    //ru->regs[ru->size++] = hregARM64_X10();
     89    //ru->regs[ru->size++] = hregARM64_X11();
     90    //ru->regs[ru->size++] = hregARM64_X12();
     91    //ru->regs[ru->size++] = hregARM64_X13();
     92    //ru->regs[ru->size++] = hregARM64_X14();
     93    //ru->regs[ru->size++] = hregARM64_X15();
     94    // X21 is the guest state pointer, not available to regalloc.
     95 
     96    // vector regs.  Unfortunately not callee-saved.
     97    ru->regs[ru->size++] = hregARM64_Q16();
     98    ru->regs[ru->size++] = hregARM64_Q17();
     99    ru->regs[ru->size++] = hregARM64_Q18();
    100    ru->regs[ru->size++] = hregARM64_Q19();
    101    ru->regs[ru->size++] = hregARM64_Q20();
    102 
    103    // F64 regs, all of which are callee-saved
    104    ru->regs[ru->size++] = hregARM64_D8();
    105    ru->regs[ru->size++] = hregARM64_D9();
    106    ru->regs[ru->size++] = hregARM64_D10();
    107    ru->regs[ru->size++] = hregARM64_D11();
    108    ru->regs[ru->size++] = hregARM64_D12();
    109    ru->regs[ru->size++] = hregARM64_D13();
    110 
    111    ru->allocable = ru->size;
    112    /* And other regs, not available to the allocator. */
    113 
    114    // unavail: x21 as GSP
    115    // x8 is used as a ProfInc temporary
    116    // x9 is used as a spill/reload/chaining/call temporary
    117    // x30 as LR
    118    // x31 because dealing with the SP-vs-ZR overloading is too
    119    // confusing, and we don't need to do so, so let's just avoid
    120    // the problem
    121    //
    122    // Currently, we have 15 allocatable integer registers:
    123    // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
    124    //
    125    // Hence for the allocatable integer registers we have:
    126    //
    127    // callee-saved: 22 23 24 25 26 27 28
    128    // caller-saved: 0 1 2 3 4 5 6 7
    129    //
    130    // If the set of available registers changes or if the e/r status
    131    // changes, be sure to re-check/sync the definition of
    132    // getRegUsage for ARM64Instr_Call too.
    133 
    134    ru->regs[ru->size++] = hregARM64_X8();
    135    ru->regs[ru->size++] = hregARM64_X9();
    136    ru->regs[ru->size++] = hregARM64_X21();
    137 
    138    rRegUniverse_ARM64_initted = True;
    139 
    140    RRegUniverse__check_is_sane(ru);
    141    return ru;
    142 }
    143 
    144 
    145 void ppHRegARM64 ( HReg reg )  {
    146    Int r;
    147    /* Be generic for all virtual regs. */
    148    if (hregIsVirtual(reg)) {
    149       ppHReg(reg);
    150       return;
    151    }
    152    /* But specific for real regs. */
    153    switch (hregClass(reg)) {
    154       case HRcInt64:
    155          r = hregEncoding(reg);
    156          vassert(r >= 0 && r < 31);
    157          vex_printf("x%d", r);
    158          return;
    159       case HRcFlt64:
    160          r = hregEncoding(reg);
    161          vassert(r >= 0 && r < 32);
    162          vex_printf("d%d", r);
    163          return;
    164       case HRcVec128:
    165          r = hregEncoding(reg);
    166          vassert(r >= 0 && r < 32);
    167          vex_printf("q%d", r);
    168          return;
    169       default:
    170          vpanic("ppHRegARM64");
    171    }
    172 }
    173 
    174 static void ppHRegARM64asSreg ( HReg reg ) {
    175    ppHRegARM64(reg);
    176    vex_printf("(S-reg)");
    177 }
    178 
    179 static void ppHRegARM64asHreg ( HReg reg ) {
    180    ppHRegARM64(reg);
    181    vex_printf("(H-reg)");
    182 }
    183 
    184 
    185 /* --------- Condition codes, ARM64 encoding. --------- */
    186 
    187 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
    188    switch (cond) {
    189        case ARM64cc_EQ:  return "eq";
    190        case ARM64cc_NE:  return "ne";
    191        case ARM64cc_CS:  return "cs";
    192        case ARM64cc_CC:  return "cc";
    193        case ARM64cc_MI:  return "mi";
    194        case ARM64cc_PL:  return "pl";
    195        case ARM64cc_VS:  return "vs";
    196        case ARM64cc_VC:  return "vc";
    197        case ARM64cc_HI:  return "hi";
    198        case ARM64cc_LS:  return "ls";
    199        case ARM64cc_GE:  return "ge";
    200        case ARM64cc_LT:  return "lt";
    201        case ARM64cc_GT:  return "gt";
    202        case ARM64cc_LE:  return "le";
    203        case ARM64cc_AL:  return "al"; // default
    204        case ARM64cc_NV:  return "nv";
    205        default: vpanic("showARM64CondCode");
    206    }
    207 }
    208 
    209 
    210 /* --------- Memory address expressions (amodes). --------- */
    211 
    212 ARM64AMode* ARM64AMode_RI9  ( HReg reg, Int simm9 ) {
    213    ARM64AMode* am        = LibVEX_Alloc_inline(sizeof(ARM64AMode));
    214    am->tag               = ARM64am_RI9;
    215    am->ARM64am.RI9.reg   = reg;
    216    am->ARM64am.RI9.simm9 = simm9;
    217    vassert(-256 <= simm9 && simm9 <= 255);
    218    return am;
    219 }
    220 
    221 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
    222    ARM64AMode* am          = LibVEX_Alloc_inline(sizeof(ARM64AMode));
    223    am->tag                 = ARM64am_RI12;
    224    am->ARM64am.RI12.reg    = reg;
    225    am->ARM64am.RI12.uimm12 = uimm12;
    226    am->ARM64am.RI12.szB    = szB;
    227    vassert(uimm12 >= 0 && uimm12 <= 4095);
    228    switch (szB) {
    229       case 1: case 2: case 4: case 8: break;
    230       default: vassert(0);
    231    }
    232    return am;
    233 }
    234 
    235 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
    236    ARM64AMode* am       = LibVEX_Alloc_inline(sizeof(ARM64AMode));
    237    am->tag              = ARM64am_RR;
    238    am->ARM64am.RR.base  = base;
    239    am->ARM64am.RR.index = index;
    240    return am;
    241 }
    242 
    243 static void ppARM64AMode ( ARM64AMode* am ) {
    244    switch (am->tag) {
    245       case ARM64am_RI9:
    246          vex_printf("%d(", am->ARM64am.RI9.simm9);
    247          ppHRegARM64(am->ARM64am.RI9.reg);
    248          vex_printf(")");
    249          break;
    250       case ARM64am_RI12:
    251          vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
    252                            * (UInt)am->ARM64am.RI12.uimm12);
    253          ppHRegARM64(am->ARM64am.RI12.reg);
    254          vex_printf(")");
    255          break;
    256       case ARM64am_RR:
    257          vex_printf("(");
    258          ppHRegARM64(am->ARM64am.RR.base);
    259          vex_printf(",");
    260          ppHRegARM64(am->ARM64am.RR.index);
    261          vex_printf(")");
    262          break;
    263       default:
    264          vassert(0);
    265    }
    266 }
    267 
    268 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
    269    switch (am->tag) {
    270       case ARM64am_RI9:
    271          addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
    272          return;
    273       case ARM64am_RI12:
    274          addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
    275          return;
    276       case ARM64am_RR:
    277          addHRegUse(u, HRmRead, am->ARM64am.RR.base);
    278          addHRegUse(u, HRmRead, am->ARM64am.RR.index);
    279          return;
    280       default:
    281          vpanic("addRegUsage_ARM64Amode");
    282    }
    283 }
    284 
    285 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
    286    switch (am->tag) {
    287       case ARM64am_RI9:
    288          am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
    289          return;
    290       case ARM64am_RI12:
    291          am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
    292          return;
    293       case ARM64am_RR:
    294          am->ARM64am.RR.base  = lookupHRegRemap(m, am->ARM64am.RR.base);
    295          am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
    296          return;
    297       default:
    298          vpanic("mapRegs_ARM64Amode");
    299    }
    300 }
    301 
    302 
    303 /* --------- Reg or uimm12<<{0,12} operands --------- */
    304 
    305 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
    306    ARM64RIA* riA           = LibVEX_Alloc_inline(sizeof(ARM64RIA));
    307    riA->tag                = ARM64riA_I12;
    308    riA->ARM64riA.I12.imm12 = imm12;
    309    riA->ARM64riA.I12.shift = shift;
    310    vassert(imm12 < 4096);
    311    vassert(shift == 0 || shift == 12);
    312    return riA;
    313 }
    314 ARM64RIA* ARM64RIA_R ( HReg reg ) {
    315    ARM64RIA* riA       = LibVEX_Alloc_inline(sizeof(ARM64RIA));
    316    riA->tag            = ARM64riA_R;
    317    riA->ARM64riA.R.reg = reg;
    318    return riA;
    319 }
    320 
    321 static void ppARM64RIA ( ARM64RIA* riA ) {
    322    switch (riA->tag) {
    323       case ARM64riA_I12:
    324          vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
    325                                  << riA->ARM64riA.I12.shift));
    326          break;
    327       case ARM64riA_R:
    328          ppHRegARM64(riA->ARM64riA.R.reg);
    329          break;
    330       default:
    331          vassert(0);
    332    }
    333 }
    334 
    335 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
    336    switch (riA->tag) {
    337       case ARM64riA_I12:
    338          return;
    339       case ARM64riA_R:
    340          addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
    341          return;
    342       default:
    343          vpanic("addRegUsage_ARM64RIA");
    344    }
    345 }
    346 
    347 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
    348    switch (riA->tag) {
    349       case ARM64riA_I12:
    350          return;
    351       case ARM64riA_R:
    352          riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
    353          return;
    354       default:
    355          vpanic("mapRegs_ARM64RIA");
    356    }
    357 }
    358 
    359 
    360 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
    361 
    362 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
    363    ARM64RIL* riL          = LibVEX_Alloc_inline(sizeof(ARM64RIL));
    364    riL->tag               = ARM64riL_I13;
    365    riL->ARM64riL.I13.bitN = bitN;
    366    riL->ARM64riL.I13.immR = immR;
    367    riL->ARM64riL.I13.immS = immS;
    368    vassert(bitN < 2);
    369    vassert(immR < 64);
    370    vassert(immS < 64);
    371    return riL;
    372 }
    373 ARM64RIL* ARM64RIL_R ( HReg reg ) {
    374    ARM64RIL* riL       = LibVEX_Alloc_inline(sizeof(ARM64RIL));
    375    riL->tag            = ARM64riL_R;
    376    riL->ARM64riL.R.reg = reg;
    377    return riL;
    378 }
    379 
    380 static void ppARM64RIL ( ARM64RIL* riL ) {
    381    switch (riL->tag) {
    382       case ARM64riL_I13:
    383          vex_printf("#nrs(%u,%u,%u)",
    384                      (UInt)riL->ARM64riL.I13.bitN,
    385                      (UInt)riL->ARM64riL.I13.immR,
    386                      (UInt)riL->ARM64riL.I13.immS);
    387          break;
    388       case ARM64riL_R:
    389          ppHRegARM64(riL->ARM64riL.R.reg);
    390          break;
    391       default:
    392          vassert(0);
    393    }
    394 }
    395 
    396 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
    397    switch (riL->tag) {
    398       case ARM64riL_I13:
    399          return;
    400       case ARM64riL_R:
    401          addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
    402          return;
    403       default:
    404          vpanic("addRegUsage_ARM64RIL");
    405    }
    406 }
    407 
    408 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
    409    switch (riL->tag) {
    410       case ARM64riL_I13:
    411          return;
    412       case ARM64riL_R:
    413          riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
    414          return;
    415       default:
    416          vpanic("mapRegs_ARM64RIL");
    417    }
    418 }
    419 
    420 
    421 /* --------------- Reg or uimm6 operands --------------- */
    422 
    423 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
    424    ARM64RI6* ri6         = LibVEX_Alloc_inline(sizeof(ARM64RI6));
    425    ri6->tag              = ARM64ri6_I6;
    426    ri6->ARM64ri6.I6.imm6 = imm6;
    427    vassert(imm6 > 0 && imm6 < 64);
    428    return ri6;
    429 }
    430 ARM64RI6* ARM64RI6_R ( HReg reg ) {
    431    ARM64RI6* ri6       = LibVEX_Alloc_inline(sizeof(ARM64RI6));
    432    ri6->tag            = ARM64ri6_R;
    433    ri6->ARM64ri6.R.reg = reg;
    434    return ri6;
    435 }
    436 
    437 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
    438    switch (ri6->tag) {
    439       case ARM64ri6_I6:
    440          vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
    441          break;
    442       case ARM64ri6_R:
    443          ppHRegARM64(ri6->ARM64ri6.R.reg);
    444          break;
    445       default:
    446          vassert(0);
    447    }
    448 }
    449 
    450 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
    451    switch (ri6->tag) {
    452       case ARM64ri6_I6:
    453          return;
    454       case ARM64ri6_R:
    455          addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
    456          return;
    457       default:
    458          vpanic("addRegUsage_ARM64RI6");
    459    }
    460 }
    461 
    462 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
    463    switch (ri6->tag) {
    464       case ARM64ri6_I6:
    465          return;
    466       case ARM64ri6_R:
    467          ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
    468          return;
    469       default:
    470          vpanic("mapRegs_ARM64RI6");
    471    }
    472 }
    473 
    474 
    475 /* --------- Instructions. --------- */
    476 
    477 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
    478    switch (op) {
    479       case ARM64lo_AND: return "and";
    480       case ARM64lo_OR:  return "orr";
    481       case ARM64lo_XOR: return "eor";
    482       default: vpanic("showARM64LogicOp");
    483    }
    484 }
    485 
    486 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
    487    switch (op) {
    488       case ARM64sh_SHL: return "lsl";
    489       case ARM64sh_SHR: return "lsr";
    490       case ARM64sh_SAR: return "asr";
    491       default: vpanic("showARM64ShiftOp");
    492    }
    493 }
    494 
    495 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
    496    switch (op) {
    497       case ARM64un_NEG: return "neg";
    498       case ARM64un_NOT: return "not";
    499       case ARM64un_CLZ: return "clz";
    500       default: vpanic("showARM64UnaryOp");
    501    }
    502 }
    503 
    504 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
    505    switch (op) {
    506       case ARM64mul_PLAIN: return "mul  ";
    507       case ARM64mul_ZX:    return "umulh";
    508       case ARM64mul_SX:    return "smulh";
    509       default: vpanic("showARM64MulOp");
    510    }
    511 }
    512 
    513 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
    514                                      /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
    515                                      ARM64CvtOp op ) {
    516    switch (op) {
    517       case ARM64cvt_F32_I32S:
    518          *syn = 's'; *fszB = 4; *iszB = 4; break;
    519       case ARM64cvt_F64_I32S:
    520          *syn = 's'; *fszB = 8; *iszB = 4; break;
    521       case ARM64cvt_F32_I64S:
    522          *syn = 's'; *fszB = 4; *iszB = 8; break;
    523       case ARM64cvt_F64_I64S:
    524          *syn = 's'; *fszB = 8; *iszB = 8; break;
    525       case ARM64cvt_F32_I32U:
    526          *syn = 'u'; *fszB = 4; *iszB = 4; break;
    527       case ARM64cvt_F64_I32U:
    528          *syn = 'u'; *fszB = 8; *iszB = 4; break;
    529       case ARM64cvt_F32_I64U:
    530          *syn = 'u'; *fszB = 4; *iszB = 8; break;
    531       case ARM64cvt_F64_I64U:
    532          *syn = 'u'; *fszB = 8; *iszB = 8; break;
    533       default:
    534          vpanic("characteriseARM64CvtOp");
    535   }
    536 }
    537 
    538 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
    539    switch (op) {
    540       case ARM64fpb_ADD: return "add";
    541       case ARM64fpb_SUB: return "sub";
    542       case ARM64fpb_MUL: return "mul";
    543       case ARM64fpb_DIV: return "div";
    544       default: vpanic("showARM64FpBinOp");
    545    }
    546 }
    547 
    548 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
    549    switch (op) {
    550       case ARM64fpu_NEG:   return "neg  ";
    551       case ARM64fpu_ABS:   return "abs  ";
    552       case ARM64fpu_SQRT:  return "sqrt ";
    553       case ARM64fpu_RINT:  return "rinti";
    554       case ARM64fpu_RECPX: return "recpx";
    555       default: vpanic("showARM64FpUnaryOp");
    556    }
    557 }
    558 
    559 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
    560                               /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
    561    switch (op) {
    562       case ARM64vecb_ADD64x2:      *nm = "add   ";    *ar = "2d";   return;
    563       case ARM64vecb_ADD32x4:      *nm = "add   ";    *ar = "4s";   return;
    564       case ARM64vecb_ADD16x8:      *nm = "add   ";    *ar = "8h";   return;
    565       case ARM64vecb_ADD8x16:      *nm = "add   ";    *ar = "16b";  return;
    566       case ARM64vecb_SUB64x2:      *nm = "sub   ";    *ar = "2d";   return;
    567       case ARM64vecb_SUB32x4:      *nm = "sub   ";    *ar = "4s";   return;
    568       case ARM64vecb_SUB16x8:      *nm = "sub   ";    *ar = "8h";   return;
    569       case ARM64vecb_SUB8x16:      *nm = "sub   ";    *ar = "16b";  return;
    570       case ARM64vecb_MUL32x4:      *nm = "mul   ";    *ar = "4s";   return;
    571       case ARM64vecb_MUL16x8:      *nm = "mul   ";    *ar = "8h";   return;
    572       case ARM64vecb_MUL8x16:      *nm = "mul   ";    *ar = "16b";  return;
    573       case ARM64vecb_FADD64x2:     *nm = "fadd  ";    *ar = "2d";   return;
    574       case ARM64vecb_FSUB64x2:     *nm = "fsub  ";    *ar = "2d";   return;
    575       case ARM64vecb_FMUL64x2:     *nm = "fmul  ";    *ar = "2d";   return;
    576       case ARM64vecb_FDIV64x2:     *nm = "fdiv  ";    *ar = "2d";   return;
    577       case ARM64vecb_FADD32x4:     *nm = "fadd  ";    *ar = "4s";   return;
    578       case ARM64vecb_FSUB32x4:     *nm = "fsub  ";    *ar = "4s";   return;
    579       case ARM64vecb_FMUL32x4:     *nm = "fmul  ";    *ar = "4s";   return;
    580       case ARM64vecb_FDIV32x4:     *nm = "fdiv  ";    *ar = "4s";   return;
    581       case ARM64vecb_FMAX64x2:     *nm = "fmax  ";    *ar = "2d";   return;
    582       case ARM64vecb_FMAX32x4:     *nm = "fmax  ";    *ar = "4s";   return;
    583       case ARM64vecb_FMIN64x2:     *nm = "fmin  ";    *ar = "2d";   return;
    584       case ARM64vecb_FMIN32x4:     *nm = "fmin  ";    *ar = "4s";   return;
    585       case ARM64vecb_UMAX32x4:     *nm = "umax  ";    *ar = "4s";   return;
    586       case ARM64vecb_UMAX16x8:     *nm = "umax  ";    *ar = "8h";   return;
    587       case ARM64vecb_UMAX8x16:     *nm = "umax  ";    *ar = "16b";  return;
    588       case ARM64vecb_UMIN32x4:     *nm = "umin  ";    *ar = "4s";   return;
    589       case ARM64vecb_UMIN16x8:     *nm = "umin  ";    *ar = "8h";   return;
    590       case ARM64vecb_UMIN8x16:     *nm = "umin  ";    *ar = "16b";  return;
    591       case ARM64vecb_SMAX32x4:     *nm = "smax  ";    *ar = "4s";   return;
    592       case ARM64vecb_SMAX16x8:     *nm = "smax  ";    *ar = "8h";   return;
    593       case ARM64vecb_SMAX8x16:     *nm = "smax  ";    *ar = "16b";  return;
    594       case ARM64vecb_SMIN32x4:     *nm = "smin  ";    *ar = "4s";   return;
    595       case ARM64vecb_SMIN16x8:     *nm = "smin  ";    *ar = "8h";   return;
    596       case ARM64vecb_SMIN8x16:     *nm = "smin  ";    *ar = "16b";  return;
    597       case ARM64vecb_AND:          *nm = "and   ";    *ar = "16b";  return;
    598       case ARM64vecb_ORR:          *nm = "orr   ";    *ar = "16b";  return;
    599       case ARM64vecb_XOR:          *nm = "eor   ";    *ar = "16b";  return;
    600       case ARM64vecb_CMEQ64x2:     *nm = "cmeq  ";    *ar = "2d";   return;
    601       case ARM64vecb_CMEQ32x4:     *nm = "cmeq  ";    *ar = "4s";   return;
    602       case ARM64vecb_CMEQ16x8:     *nm = "cmeq  ";    *ar = "8h";   return;
    603       case ARM64vecb_CMEQ8x16:     *nm = "cmeq  ";    *ar = "16b";  return;
    604       case ARM64vecb_CMHI64x2:     *nm = "cmhi  ";    *ar = "2d";   return;
    605       case ARM64vecb_CMHI32x4:     *nm = "cmhi  ";    *ar = "4s";   return;
    606       case ARM64vecb_CMHI16x8:     *nm = "cmhi  ";    *ar = "8h";   return;
    607       case ARM64vecb_CMHI8x16:     *nm = "cmhi  ";    *ar = "16b";  return;
    608       case ARM64vecb_CMGT64x2:     *nm = "cmgt  ";    *ar = "2d";   return;
    609       case ARM64vecb_CMGT32x4:     *nm = "cmgt  ";    *ar = "4s";   return;
    610       case ARM64vecb_CMGT16x8:     *nm = "cmgt  ";    *ar = "8h";   return;
    611       case ARM64vecb_CMGT8x16:     *nm = "cmgt  ";    *ar = "16b";  return;
    612       case ARM64vecb_FCMEQ64x2:    *nm = "fcmeq ";    *ar = "2d";   return;
    613       case ARM64vecb_FCMEQ32x4:    *nm = "fcmeq ";    *ar = "4s";   return;
    614       case ARM64vecb_FCMGE64x2:    *nm = "fcmge ";    *ar = "2d";   return;
    615       case ARM64vecb_FCMGE32x4:    *nm = "fcmge ";    *ar = "4s";   return;
    616       case ARM64vecb_FCMGT64x2:    *nm = "fcmgt ";    *ar = "2d";   return;
    617       case ARM64vecb_FCMGT32x4:    *nm = "fcmgt ";    *ar = "4s";   return;
    618       case ARM64vecb_TBL1:         *nm = "tbl   ";    *ar = "16b";  return;
    619       case ARM64vecb_UZP164x2:     *nm = "uzp1  ";    *ar = "2d";   return;
    620       case ARM64vecb_UZP132x4:     *nm = "uzp1  ";    *ar = "4s";   return;
    621       case ARM64vecb_UZP116x8:     *nm = "uzp1  ";    *ar = "8h";   return;
    622       case ARM64vecb_UZP18x16:     *nm = "uzp1  ";    *ar = "16b";  return;
    623       case ARM64vecb_UZP264x2:     *nm = "uzp2  ";    *ar = "2d";   return;
    624       case ARM64vecb_UZP232x4:     *nm = "uzp2  ";    *ar = "4s";   return;
    625       case ARM64vecb_UZP216x8:     *nm = "uzp2  ";    *ar = "8h";   return;
    626       case ARM64vecb_UZP28x16:     *nm = "uzp2  ";    *ar = "16b";  return;
    627       case ARM64vecb_ZIP132x4:     *nm = "zip1  ";    *ar = "4s";   return;
    628       case ARM64vecb_ZIP116x8:     *nm = "zip1  ";    *ar = "8h";   return;
    629       case ARM64vecb_ZIP18x16:     *nm = "zip1  ";    *ar = "16b";  return;
    630       case ARM64vecb_ZIP232x4:     *nm = "zip2  ";    *ar = "4s";   return;
    631       case ARM64vecb_ZIP216x8:     *nm = "zip2  ";    *ar = "8h";   return;
    632       case ARM64vecb_ZIP28x16:     *nm = "zip2  ";    *ar = "16b";  return;
    633       case ARM64vecb_PMUL8x16:     *nm = "pmul  ";    *ar = "16b";  return;
    634       case ARM64vecb_PMULL8x8:     *nm = "pmull ";    *ar = "8hbb"; return;
    635       case ARM64vecb_UMULL2DSS:    *nm = "umull ";    *ar = "2dss"; return;
    636       case ARM64vecb_UMULL4SHH:    *nm = "umull ";    *ar = "4shh"; return;
    637       case ARM64vecb_UMULL8HBB:    *nm = "umull ";    *ar = "8hbb"; return;
    638       case ARM64vecb_SMULL2DSS:    *nm = "smull ";    *ar = "2dss"; return;
    639       case ARM64vecb_SMULL4SHH:    *nm = "smull ";    *ar = "4shh"; return;
    640       case ARM64vecb_SMULL8HBB:    *nm = "smull ";    *ar = "8hbb"; return;
    641       case ARM64vecb_SQADD64x2:    *nm = "sqadd ";    *ar = "2d";   return;
    642       case ARM64vecb_SQADD32x4:    *nm = "sqadd ";    *ar = "4s";   return;
    643       case ARM64vecb_SQADD16x8:    *nm = "sqadd ";    *ar = "8h";   return;
    644       case ARM64vecb_SQADD8x16:    *nm = "sqadd ";    *ar = "16b";  return;
    645       case ARM64vecb_UQADD64x2:    *nm = "uqadd ";    *ar = "2d";   return;
    646       case ARM64vecb_UQADD32x4:    *nm = "uqadd ";    *ar = "4s";   return;
    647       case ARM64vecb_UQADD16x8:    *nm = "uqadd ";    *ar = "8h";   return;
    648       case ARM64vecb_UQADD8x16:    *nm = "uqadd ";    *ar = "16b";  return;
    649       case ARM64vecb_SQSUB64x2:    *nm = "sqsub ";    *ar = "2d";   return;
    650       case ARM64vecb_SQSUB32x4:    *nm = "sqsub ";    *ar = "4s";   return;
    651       case ARM64vecb_SQSUB16x8:    *nm = "sqsub ";    *ar = "8h";   return;
    652       case ARM64vecb_SQSUB8x16:    *nm = "sqsub ";    *ar = "16b";  return;
    653       case ARM64vecb_UQSUB64x2:    *nm = "uqsub ";    *ar = "2d";   return;
    654       case ARM64vecb_UQSUB32x4:    *nm = "uqsub ";    *ar = "4s";   return;
    655       case ARM64vecb_UQSUB16x8:    *nm = "uqsub ";    *ar = "8h";   return;
    656       case ARM64vecb_UQSUB8x16:    *nm = "uqsub ";    *ar = "16b";  return;
    657       case ARM64vecb_SQDMULL2DSS:  *nm = "sqdmull";   *ar = "2dss"; return;
    658       case ARM64vecb_SQDMULL4SHH:  *nm = "sqdmull";   *ar = "4shh"; return;
    659       case ARM64vecb_SQDMULH32x4:  *nm = "sqdmulh";   *ar = "4s";   return;
    660       case ARM64vecb_SQDMULH16x8:  *nm = "sqdmulh";   *ar = "8h";   return;
    661       case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh";  *ar = "4s";   return;
    662       case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh";  *ar = "8h";   return;
    663       case ARM64vecb_SQSHL64x2:    *nm = "sqshl ";    *ar = "2d";   return;
    664       case ARM64vecb_SQSHL32x4:    *nm = "sqshl ";    *ar = "4s";   return;
    665       case ARM64vecb_SQSHL16x8:    *nm = "sqshl ";    *ar = "8h";   return;
    666       case ARM64vecb_SQSHL8x16:    *nm = "sqshl ";    *ar = "16b";  return;
    667       case ARM64vecb_UQSHL64x2:    *nm = "uqshl ";    *ar = "2d";   return;
    668       case ARM64vecb_UQSHL32x4:    *nm = "uqshl ";    *ar = "4s";   return;
    669       case ARM64vecb_UQSHL16x8:    *nm = "uqshl ";    *ar = "8h";   return;
    670       case ARM64vecb_UQSHL8x16:    *nm = "uqshl ";    *ar = "16b";  return;
    671       case ARM64vecb_SQRSHL64x2:   *nm = "sqrshl";    *ar = "2d";   return;
    672       case ARM64vecb_SQRSHL32x4:   *nm = "sqrshl";    *ar = "4s";   return;
    673       case ARM64vecb_SQRSHL16x8:   *nm = "sqrshl";    *ar = "8h";   return;
    674       case ARM64vecb_SQRSHL8x16:   *nm = "sqrshl";    *ar = "16b";  return;
    675       case ARM64vecb_UQRSHL64x2:   *nm = "uqrshl";    *ar = "2d";   return;
    676       case ARM64vecb_UQRSHL32x4:   *nm = "uqrshl";    *ar = "4s";   return;
    677       case ARM64vecb_UQRSHL16x8:   *nm = "uqrshl";    *ar = "8h";   return;
    678       case ARM64vecb_UQRSHL8x16:   *nm = "uqrshl";    *ar = "16b";  return;
    679       case ARM64vecb_SSHL64x2:     *nm = "sshl  ";    *ar = "2d";   return;
    680       case ARM64vecb_SSHL32x4:     *nm = "sshl  ";    *ar = "4s";   return;
    681       case ARM64vecb_SSHL16x8:     *nm = "sshl  ";    *ar = "8h";   return;
    682       case ARM64vecb_SSHL8x16:     *nm = "sshl  ";    *ar = "16b";  return;
    683       case ARM64vecb_USHL64x2:     *nm = "ushl  ";    *ar = "2d";   return;
    684       case ARM64vecb_USHL32x4:     *nm = "ushl  ";    *ar = "4s";   return;
    685       case ARM64vecb_USHL16x8:     *nm = "ushl  ";    *ar = "8h";   return;
    686       case ARM64vecb_USHL8x16:     *nm = "ushl  ";    *ar = "16b";  return;
    687       case ARM64vecb_SRSHL64x2:    *nm = "srshl ";    *ar = "2d";   return;
    688       case ARM64vecb_SRSHL32x4:    *nm = "srshl ";    *ar = "4s";   return;
    689       case ARM64vecb_SRSHL16x8:    *nm = "srshl ";    *ar = "8h";   return;
    690       case ARM64vecb_SRSHL8x16:    *nm = "srshl ";    *ar = "16b";  return;
    691       case ARM64vecb_URSHL64x2:    *nm = "urshl ";    *ar = "2d";   return;
    692       case ARM64vecb_URSHL32x4:    *nm = "urshl ";    *ar = "4s";   return;
    693       case ARM64vecb_URSHL16x8:    *nm = "urshl ";    *ar = "8h";   return;
    694       case ARM64vecb_URSHL8x16:    *nm = "urshl ";    *ar = "16b";  return;
    695       case ARM64vecb_FRECPS64x2:   *nm = "frecps";    *ar = "2d";   return;
    696       case ARM64vecb_FRECPS32x4:   *nm = "frecps";    *ar = "4s";   return;
    697       case ARM64vecb_FRSQRTS64x2:  *nm = "frsqrts";   *ar = "2d";   return;
    698       case ARM64vecb_FRSQRTS32x4:  *nm = "frsqrts";   *ar = "4s";   return;
    699       default: vpanic("showARM64VecBinOp");
    700    }
    701 }
    702 
    703 static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
    704                                  /*OUT*/const HChar** ar,
    705                                  ARM64VecModifyOp op ) {
    706    switch (op) {
    707       case ARM64vecmo_SUQADD64x2:   *nm = "suqadd";    *ar = "2d";   return;
    708       case ARM64vecmo_SUQADD32x4:   *nm = "suqadd";    *ar = "4s";   return;
    709       case ARM64vecmo_SUQADD16x8:   *nm = "suqadd";    *ar = "8h";   return;
    710       case ARM64vecmo_SUQADD8x16:   *nm = "suqadd";    *ar = "16b";  return;
    711       case ARM64vecmo_USQADD64x2:   *nm = "usqadd";    *ar = "2d";   return;
    712       case ARM64vecmo_USQADD32x4:   *nm = "usqadd";    *ar = "4s";   return;
    713       case ARM64vecmo_USQADD16x8:   *nm = "usqadd";    *ar = "8h";   return;
    714       case ARM64vecmo_USQADD8x16:   *nm = "usqadd";    *ar = "16b";  return;
    715       default: vpanic("showARM64VecModifyOp");
    716    }
    717 }
    718 
    719 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
    720                                 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
    721 {
    722    switch (op) {
    723       case ARM64vecu_FNEG64x2:    *nm = "fneg ";   *ar = "2d";  return;
    724       case ARM64vecu_FNEG32x4:    *nm = "fneg ";   *ar = "4s";  return;
    725       case ARM64vecu_FABS64x2:    *nm = "fabs ";   *ar = "2d";  return;
    726       case ARM64vecu_FABS32x4:    *nm = "fabs ";   *ar = "4s";  return;
    727       case ARM64vecu_NOT:         *nm = "not  ";   *ar = "all"; return;
    728       case ARM64vecu_ABS64x2:     *nm = "abs  ";   *ar = "2d";  return;
    729       case ARM64vecu_ABS32x4:     *nm = "abs  ";   *ar = "4s";  return;
    730       case ARM64vecu_ABS16x8:     *nm = "abs  ";   *ar = "8h";  return;
    731       case ARM64vecu_ABS8x16:     *nm = "abs  ";   *ar = "16b"; return;
    732       case ARM64vecu_CLS32x4:     *nm = "cls  ";   *ar = "4s";  return;
    733       case ARM64vecu_CLS16x8:     *nm = "cls  ";   *ar = "8h";  return;
    734       case ARM64vecu_CLS8x16:     *nm = "cls  ";   *ar = "16b"; return;
    735       case ARM64vecu_CLZ32x4:     *nm = "clz  ";   *ar = "4s";  return;
    736       case ARM64vecu_CLZ16x8:     *nm = "clz  ";   *ar = "8h";  return;
    737       case ARM64vecu_CLZ8x16:     *nm = "clz  ";   *ar = "16b"; return;
    738       case ARM64vecu_CNT8x16:     *nm = "cnt  ";   *ar = "16b"; return;
    739       case ARM64vecu_RBIT:        *nm = "rbit ";   *ar = "16b"; return;
    740       case ARM64vecu_REV1616B:    *nm = "rev16";   *ar = "16b"; return;
    741       case ARM64vecu_REV3216B:    *nm = "rev32";   *ar = "16b"; return;
    742       case ARM64vecu_REV328H:     *nm = "rev32";   *ar = "8h";  return;
    743       case ARM64vecu_REV6416B:    *nm = "rev64";   *ar = "16b"; return;
    744       case ARM64vecu_REV648H:     *nm = "rev64";   *ar = "8h";  return;
    745       case ARM64vecu_REV644S:     *nm = "rev64";   *ar = "4s";  return;
    746       case ARM64vecu_URECPE32x4:  *nm = "urecpe";  *ar = "4s";  return;
    747       case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s";  return;
    748       case ARM64vecu_FRECPE64x2:  *nm = "frecpe";  *ar = "2d";  return;
    749       case ARM64vecu_FRECPE32x4:  *nm = "frecpe";  *ar = "4s";  return;
    750       case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d";  return;
    751       case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s";  return;
    752       case ARM64vecu_FSQRT64x2:   *nm = "fsqrt";   *ar = "2d";  return;
    753       case ARM64vecu_FSQRT32x4:   *nm = "fsqrt";   *ar = "4s";  return;
    754       default: vpanic("showARM64VecUnaryOp");
    755    }
    756 }
    757 
    758 static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
    759                                    /*OUT*/const HChar** ar,
    760                                    ARM64VecShiftImmOp op )
    761 {
    762    switch (op) {
    763       case ARM64vecshi_USHR64x2:    *nm = "ushr  ";   *ar = "2d";  return;
    764       case ARM64vecshi_USHR32x4:    *nm = "ushr  ";   *ar = "4s";  return;
    765       case ARM64vecshi_USHR16x8:    *nm = "ushr  ";   *ar = "8h";  return;
    766       case ARM64vecshi_USHR8x16:    *nm = "ushr  ";   *ar = "16b"; return;
    767       case ARM64vecshi_SSHR64x2:    *nm = "sshr  ";   *ar = "2d";  return;
    768       case ARM64vecshi_SSHR32x4:    *nm = "sshr  ";   *ar = "4s";  return;
    769       case ARM64vecshi_SSHR16x8:    *nm = "sshr  ";   *ar = "8h";  return;
    770       case ARM64vecshi_SSHR8x16:    *nm = "sshr  ";   *ar = "16b"; return;
    771       case ARM64vecshi_SHL64x2:     *nm = "shl   ";   *ar = "2d";  return;
    772       case ARM64vecshi_SHL32x4:     *nm = "shl   ";   *ar = "4s";  return;
    773       case ARM64vecshi_SHL16x8:     *nm = "shl   ";   *ar = "8h";  return;
    774       case ARM64vecshi_SHL8x16:     *nm = "shl   ";   *ar = "16b"; return;
    775       case ARM64vecshi_SQSHRN2SD:   *nm = "sqshrn";   *ar = "2sd"; return;
    776       case ARM64vecshi_SQSHRN4HS:   *nm = "sqshrn";   *ar = "4hs"; return;
    777       case ARM64vecshi_SQSHRN8BH:   *nm = "sqshrn";   *ar = "8bh"; return;
    778       case ARM64vecshi_UQSHRN2SD:   *nm = "uqshrn";   *ar = "2sd"; return;
    779       case ARM64vecshi_UQSHRN4HS:   *nm = "uqshrn";   *ar = "4hs"; return;
    780       case ARM64vecshi_UQSHRN8BH:   *nm = "uqshrn";   *ar = "8bh"; return;
    781       case ARM64vecshi_SQSHRUN2SD:  *nm = "sqshrun";  *ar = "2sd"; return;
    782       case ARM64vecshi_SQSHRUN4HS:  *nm = "sqshrun";  *ar = "4hs"; return;
    783       case ARM64vecshi_SQSHRUN8BH:  *nm = "sqshrun";  *ar = "8bh"; return;
    784       case ARM64vecshi_SQRSHRN2SD:  *nm = "sqrshrn";  *ar = "2sd"; return;
    785       case ARM64vecshi_SQRSHRN4HS:  *nm = "sqrshrn";  *ar = "4hs"; return;
    786       case ARM64vecshi_SQRSHRN8BH:  *nm = "sqrshrn";  *ar = "8bh"; return;
    787       case ARM64vecshi_UQRSHRN2SD:  *nm = "uqrshrn";  *ar = "2sd"; return;
    788       case ARM64vecshi_UQRSHRN4HS:  *nm = "uqrshrn";  *ar = "4hs"; return;
    789       case ARM64vecshi_UQRSHRN8BH:  *nm = "uqrshrn";  *ar = "8bh"; return;
    790       case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
    791       case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
    792       case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
    793       case ARM64vecshi_UQSHL64x2:   *nm = "uqshl ";   *ar = "2d";  return;
    794       case ARM64vecshi_UQSHL32x4:   *nm = "uqshl ";   *ar = "4s";  return;
    795       case ARM64vecshi_UQSHL16x8:   *nm = "uqshl ";   *ar = "8h";  return;
    796       case ARM64vecshi_UQSHL8x16:   *nm = "uqshl ";   *ar = "16b"; return;
    797       case ARM64vecshi_SQSHL64x2:   *nm = "sqshl ";   *ar = "2d";  return;
    798       case ARM64vecshi_SQSHL32x4:   *nm = "sqshl ";   *ar = "4s";  return;
    799       case ARM64vecshi_SQSHL16x8:   *nm = "sqshl ";   *ar = "8h";  return;
    800       case ARM64vecshi_SQSHL8x16:   *nm = "sqshl ";   *ar = "16b"; return;
    801       case ARM64vecshi_SQSHLU64x2:  *nm = "sqshlu";   *ar = "2d";  return;
    802       case ARM64vecshi_SQSHLU32x4:  *nm = "sqshlu";   *ar = "4s";  return;
    803       case ARM64vecshi_SQSHLU16x8:  *nm = "sqshlu";   *ar = "8h";  return;
    804       case ARM64vecshi_SQSHLU8x16:  *nm = "sqshlu";   *ar = "16b"; return;
    805       default: vpanic("showARM64VecShiftImmOp");
    806    }
    807 }
    808 
    809 static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
    810    switch (op) {
    811       case ARM64vecna_XTN:    return "xtn   ";
    812       case ARM64vecna_SQXTN:  return "sqxtn ";
    813       case ARM64vecna_UQXTN:  return "uqxtn ";
    814       case ARM64vecna_SQXTUN: return "sqxtun";
    815       default: vpanic("showARM64VecNarrowOp");
    816    }
    817 }
    818 
    819 ARM64Instr* ARM64Instr_Arith ( HReg dst,
    820                                HReg argL, ARM64RIA* argR, Bool isAdd ) {
    821    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    822    i->tag                 = ARM64in_Arith;
    823    i->ARM64in.Arith.dst   = dst;
    824    i->ARM64in.Arith.argL  = argL;
    825    i->ARM64in.Arith.argR  = argR;
    826    i->ARM64in.Arith.isAdd = isAdd;
    827    return i;
    828 }
    829 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
    830    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    831    i->tag              = ARM64in_Cmp;
    832    i->ARM64in.Cmp.argL = argL;
    833    i->ARM64in.Cmp.argR = argR;
    834    i->ARM64in.Cmp.is64 = is64;
    835    return i;
    836 }
    837 ARM64Instr* ARM64Instr_Logic ( HReg dst,
    838                                HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
    839    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    840    i->tag                 = ARM64in_Logic;
    841    i->ARM64in.Logic.dst   = dst;
    842    i->ARM64in.Logic.argL  = argL;
    843    i->ARM64in.Logic.argR  = argR;
    844    i->ARM64in.Logic.op    = op;
    845    return i;
    846 }
    847 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
    848    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    849    i->tag               = ARM64in_Test;
    850    i->ARM64in.Test.argL = argL;
    851    i->ARM64in.Test.argR = argR;
    852    return i;
    853 }
    854 ARM64Instr* ARM64Instr_Shift ( HReg dst,
    855                                HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
    856    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    857    i->tag                = ARM64in_Shift;
    858    i->ARM64in.Shift.dst  = dst;
    859    i->ARM64in.Shift.argL = argL;
    860    i->ARM64in.Shift.argR = argR;
    861    i->ARM64in.Shift.op   = op;
    862    return i;
    863 }
    864 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
    865    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    866    i->tag               = ARM64in_Unary;
    867    i->ARM64in.Unary.dst = dst;
    868    i->ARM64in.Unary.src = src;
    869    i->ARM64in.Unary.op  = op;
    870    return i;
    871 }
    872 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
    873    ARM64Instr* i      = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    874    i->tag             = ARM64in_MovI;
    875    i->ARM64in.MovI.dst = dst;
    876    i->ARM64in.MovI.src = src;
    877    vassert(hregClass(src) == HRcInt64);
    878    vassert(hregClass(dst) == HRcInt64);
    879    return i;
    880 }
    881 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
    882    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    883    i->tag                 = ARM64in_Imm64;
    884    i->ARM64in.Imm64.dst   = dst;
    885    i->ARM64in.Imm64.imm64 = imm64;
    886    return i;
    887 }
    888 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
    889    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    890    i->tag                   = ARM64in_LdSt64;
    891    i->ARM64in.LdSt64.isLoad = isLoad;
    892    i->ARM64in.LdSt64.rD     = rD;
    893    i->ARM64in.LdSt64.amode  = amode;
    894    return i;
    895 }
    896 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
    897    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    898    i->tag                   = ARM64in_LdSt32;
    899    i->ARM64in.LdSt32.isLoad = isLoad;
    900    i->ARM64in.LdSt32.rD     = rD;
    901    i->ARM64in.LdSt32.amode  = amode;
    902    return i;
    903 }
    904 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
    905    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    906    i->tag                   = ARM64in_LdSt16;
    907    i->ARM64in.LdSt16.isLoad = isLoad;
    908    i->ARM64in.LdSt16.rD     = rD;
    909    i->ARM64in.LdSt16.amode  = amode;
    910    return i;
    911 }
    912 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
    913    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    914    i->tag                  = ARM64in_LdSt8;
    915    i->ARM64in.LdSt8.isLoad = isLoad;
    916    i->ARM64in.LdSt8.rD     = rD;
    917    i->ARM64in.LdSt8.amode  = amode;
    918    return i;
    919 }
    920 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
    921                                  ARM64CondCode cond, Bool toFastEP ) {
    922    ARM64Instr* i               = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    923    i->tag                      = ARM64in_XDirect;
    924    i->ARM64in.XDirect.dstGA    = dstGA;
    925    i->ARM64in.XDirect.amPC     = amPC;
    926    i->ARM64in.XDirect.cond     = cond;
    927    i->ARM64in.XDirect.toFastEP = toFastEP;
    928    return i;
    929 }
    930 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
    931                                 ARM64CondCode cond ) {
    932    ARM64Instr* i           = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    933    i->tag                  = ARM64in_XIndir;
    934    i->ARM64in.XIndir.dstGA = dstGA;
    935    i->ARM64in.XIndir.amPC  = amPC;
    936    i->ARM64in.XIndir.cond  = cond;
    937    return i;
    938 }
    939 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
    940                                    ARM64CondCode cond, IRJumpKind jk ) {
    941    ARM64Instr* i              = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    942    i->tag                     = ARM64in_XAssisted;
    943    i->ARM64in.XAssisted.dstGA = dstGA;
    944    i->ARM64in.XAssisted.amPC  = amPC;
    945    i->ARM64in.XAssisted.cond  = cond;
    946    i->ARM64in.XAssisted.jk    = jk;
    947    return i;
    948 }
    949 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
    950                               ARM64CondCode cond ) {
    951    ARM64Instr* i        = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    952    i->tag               = ARM64in_CSel;
    953    i->ARM64in.CSel.dst  = dst;
    954    i->ARM64in.CSel.argL = argL;
    955    i->ARM64in.CSel.argR = argR;
    956    i->ARM64in.CSel.cond = cond;
    957    return i;
    958 }
    959 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, Addr64 target, Int nArgRegs,
    960                               RetLoc rloc ) {
    961    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    962    i->tag                   = ARM64in_Call;
    963    i->ARM64in.Call.cond     = cond;
    964    i->ARM64in.Call.target   = target;
    965    i->ARM64in.Call.nArgRegs = nArgRegs;
    966    i->ARM64in.Call.rloc     = rloc;
    967    vassert(is_sane_RetLoc(rloc));
    968    return i;
    969 }
    970 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
    971    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    972    i->tag                  = ARM64in_AddToSP;
    973    i->ARM64in.AddToSP.simm = simm;
    974    vassert(-4096 < simm && simm < 4096);
    975    vassert(0 == (simm & 0xF));
    976    return i;
    977 }
    978 extern ARM64Instr* ARM64Instr_FromSP  ( HReg dst ) {
    979    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    980    i->tag                = ARM64in_FromSP;
    981    i->ARM64in.FromSP.dst = dst;
    982    return i;
    983 }
    984 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
    985                              ARM64MulOp op ) {
    986    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    987    i->tag              = ARM64in_Mul;
    988    i->ARM64in.Mul.dst  = dst;
    989    i->ARM64in.Mul.argL = argL;
    990    i->ARM64in.Mul.argR = argR;
    991    i->ARM64in.Mul.op   = op;
    992    return i;
    993 }
    994 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
    995    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    996    i->tag               = ARM64in_LdrEX;
    997    i->ARM64in.LdrEX.szB = szB;
    998    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
    999    return i;
   1000 }
   1001 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
   1002    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1003    i->tag               = ARM64in_StrEX;
   1004    i->ARM64in.StrEX.szB = szB;
   1005    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1006    return i;
   1007 }
   1008 ARM64Instr* ARM64Instr_CAS ( Int szB ) {
   1009    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1010    i->tag             = ARM64in_CAS;
   1011    i->ARM64in.CAS.szB = szB;
   1012    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1013    return i;
   1014 }
   1015 ARM64Instr* ARM64Instr_MFence ( void ) {
   1016    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1017    i->tag        = ARM64in_MFence;
   1018    return i;
   1019 }
   1020 ARM64Instr* ARM64Instr_ClrEX ( void ) {
   1021    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1022    i->tag        = ARM64in_ClrEX;
   1023    return i;
   1024 }
   1025 ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
   1026    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1027    i->tag                   = ARM64in_VLdStH;
   1028    i->ARM64in.VLdStH.isLoad = isLoad;
   1029    i->ARM64in.VLdStH.hD     = sD;
   1030    i->ARM64in.VLdStH.rN     = rN;
   1031    i->ARM64in.VLdStH.uimm12 = uimm12;
   1032    vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
   1033    return i;
   1034 }
   1035 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
   1036    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1037    i->tag                   = ARM64in_VLdStS;
   1038    i->ARM64in.VLdStS.isLoad = isLoad;
   1039    i->ARM64in.VLdStS.sD     = sD;
   1040    i->ARM64in.VLdStS.rN     = rN;
   1041    i->ARM64in.VLdStS.uimm12 = uimm12;
   1042    vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
   1043    return i;
   1044 }
   1045 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
   1046    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1047    i->tag                   = ARM64in_VLdStD;
   1048    i->ARM64in.VLdStD.isLoad = isLoad;
   1049    i->ARM64in.VLdStD.dD     = dD;
   1050    i->ARM64in.VLdStD.rN     = rN;
   1051    i->ARM64in.VLdStD.uimm12 = uimm12;
   1052    vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
   1053    return i;
   1054 }
   1055 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
   1056    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1057    i->tag                   = ARM64in_VLdStQ;
   1058    i->ARM64in.VLdStQ.isLoad = isLoad;
   1059    i->ARM64in.VLdStQ.rQ     = rQ;
   1060    i->ARM64in.VLdStQ.rN     = rN;
   1061    return i;
   1062 }
   1063 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
   1064    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1065    i->tag                 = ARM64in_VCvtI2F;
   1066    i->ARM64in.VCvtI2F.how = how;
   1067    i->ARM64in.VCvtI2F.rD  = rD;
   1068    i->ARM64in.VCvtI2F.rS  = rS;
   1069    return i;
   1070 }
   1071 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
   1072                                  UChar armRM ) {
   1073    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1074    i->tag                   = ARM64in_VCvtF2I;
   1075    i->ARM64in.VCvtF2I.how   = how;
   1076    i->ARM64in.VCvtF2I.rD    = rD;
   1077    i->ARM64in.VCvtF2I.rS    = rS;
   1078    i->ARM64in.VCvtF2I.armRM = armRM;
   1079    vassert(armRM <= 3);
   1080    return i;
   1081 }
   1082 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
   1083    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1084    i->tag                 = ARM64in_VCvtSD;
   1085    i->ARM64in.VCvtSD.sToD = sToD;
   1086    i->ARM64in.VCvtSD.dst  = dst;
   1087    i->ARM64in.VCvtSD.src  = src;
   1088    return i;
   1089 }
   1090 ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
   1091    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1092    i->tag                 = ARM64in_VCvtHS;
   1093    i->ARM64in.VCvtHS.hToS = hToS;
   1094    i->ARM64in.VCvtHS.dst  = dst;
   1095    i->ARM64in.VCvtHS.src  = src;
   1096    return i;
   1097 }
   1098 ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
   1099    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1100    i->tag                 = ARM64in_VCvtHD;
   1101    i->ARM64in.VCvtHD.hToD = hToD;
   1102    i->ARM64in.VCvtHD.dst  = dst;
   1103    i->ARM64in.VCvtHD.src  = src;
   1104    return i;
   1105 }
   1106 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
   1107    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1108    i->tag                 = ARM64in_VUnaryD;
   1109    i->ARM64in.VUnaryD.op  = op;
   1110    i->ARM64in.VUnaryD.dst = dst;
   1111    i->ARM64in.VUnaryD.src = src;
   1112    return i;
   1113 }
   1114 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
   1115    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1116    i->tag                 = ARM64in_VUnaryS;
   1117    i->ARM64in.VUnaryS.op  = op;
   1118    i->ARM64in.VUnaryS.dst = dst;
   1119    i->ARM64in.VUnaryS.src = src;
   1120    return i;
   1121 }
   1122 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
   1123                                HReg dst, HReg argL, HReg argR ) {
   1124    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1125    i->tag                = ARM64in_VBinD;
   1126    i->ARM64in.VBinD.op   = op;
   1127    i->ARM64in.VBinD.dst  = dst;
   1128    i->ARM64in.VBinD.argL = argL;
   1129    i->ARM64in.VBinD.argR = argR;
   1130    return i;
   1131 }
   1132 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
   1133                                HReg dst, HReg argL, HReg argR ) {
   1134    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1135    i->tag                = ARM64in_VBinS;
   1136    i->ARM64in.VBinS.op   = op;
   1137    i->ARM64in.VBinS.dst  = dst;
   1138    i->ARM64in.VBinS.argL = argL;
   1139    i->ARM64in.VBinS.argR = argR;
   1140    return i;
   1141 }
   1142 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
   1143    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1144    i->tag                = ARM64in_VCmpD;
   1145    i->ARM64in.VCmpD.argL = argL;
   1146    i->ARM64in.VCmpD.argR = argR;
   1147    return i;
   1148 }
   1149 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
   1150    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1151    i->tag                = ARM64in_VCmpS;
   1152    i->ARM64in.VCmpS.argL = argL;
   1153    i->ARM64in.VCmpS.argR = argR;
   1154    return i;
   1155 }
   1156 ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
   1157                                 ARM64CondCode cond, Bool isD ) {
   1158    ARM64Instr* i          = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1159    i->tag                 = ARM64in_VFCSel;
   1160    i->ARM64in.VFCSel.dst  = dst;
   1161    i->ARM64in.VFCSel.argL = argL;
   1162    i->ARM64in.VFCSel.argR = argR;
   1163    i->ARM64in.VFCSel.cond = cond;
   1164    i->ARM64in.VFCSel.isD  = isD;
   1165    return i;
   1166 }
   1167 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
   1168    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1169    i->tag                 = ARM64in_FPCR;
   1170    i->ARM64in.FPCR.toFPCR = toFPCR;
   1171    i->ARM64in.FPCR.iReg   = iReg;
   1172    return i;
   1173 }
   1174 ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
   1175    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1176    i->tag                 = ARM64in_FPSR;
   1177    i->ARM64in.FPSR.toFPSR = toFPSR;
   1178    i->ARM64in.FPSR.iReg   = iReg;
   1179    return i;
   1180 }
   1181 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
   1182                                HReg dst, HReg argL, HReg argR ) {
   1183    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1184    i->tag                = ARM64in_VBinV;
   1185    i->ARM64in.VBinV.op   = op;
   1186    i->ARM64in.VBinV.dst  = dst;
   1187    i->ARM64in.VBinV.argL = argL;
   1188    i->ARM64in.VBinV.argR = argR;
   1189    return i;
   1190 }
   1191 ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
   1192    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1193    i->tag                  = ARM64in_VModifyV;
   1194    i->ARM64in.VModifyV.op  = op;
   1195    i->ARM64in.VModifyV.mod = mod;
   1196    i->ARM64in.VModifyV.arg = arg;
   1197    return i;
   1198 }
   1199 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
   1200    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1201    i->tag                 = ARM64in_VUnaryV;
   1202    i->ARM64in.VUnaryV.op  = op;
   1203    i->ARM64in.VUnaryV.dst = dst;
   1204    i->ARM64in.VUnaryV.arg = arg;
   1205    return i;
   1206 }
   1207 ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
   1208                                   UInt dszBlg2, HReg dst, HReg src ) {
   1209    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1210    i->tag                      = ARM64in_VNarrowV;
   1211    i->ARM64in.VNarrowV.op      = op;
   1212    i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
   1213    i->ARM64in.VNarrowV.dst     = dst;
   1214    i->ARM64in.VNarrowV.src     = src;
   1215    vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
   1216    return i;
   1217 }
   1218 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
   1219                                     HReg dst, HReg src, UInt amt ) {
   1220    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1221    i->tag                    = ARM64in_VShiftImmV;
   1222    i->ARM64in.VShiftImmV.op  = op;
   1223    i->ARM64in.VShiftImmV.dst = dst;
   1224    i->ARM64in.VShiftImmV.src = src;
   1225    i->ARM64in.VShiftImmV.amt = amt;
   1226    UInt minSh = 0;
   1227    UInt maxSh = 0;
   1228    switch (op) {
   1229       /* For right shifts, the allowed shift amounts are 1 .. lane_size.
   1230          For left shifts,  the allowed shift amounts are 0 .. lane_size-1.
   1231       */
   1232       case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
   1233       case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
   1234       case ARM64vecshi_SQSHRUN2SD:
   1235       case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
   1236       case ARM64vecshi_SQRSHRUN2SD:
   1237          minSh = 1; maxSh = 64; break;
   1238       case ARM64vecshi_SHL64x2:
   1239       case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
   1240       case ARM64vecshi_SQSHLU64x2:
   1241          minSh = 0; maxSh = 63; break;
   1242       case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
   1243       case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
   1244       case ARM64vecshi_SQSHRUN4HS:
   1245       case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
   1246       case ARM64vecshi_SQRSHRUN4HS:
   1247          minSh = 1; maxSh = 32; break;
   1248       case ARM64vecshi_SHL32x4:
   1249       case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
   1250       case ARM64vecshi_SQSHLU32x4:
   1251          minSh = 0; maxSh = 31; break;
   1252       case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
   1253       case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
   1254       case ARM64vecshi_SQSHRUN8BH:
   1255       case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
   1256       case ARM64vecshi_SQRSHRUN8BH:
   1257          minSh = 1; maxSh = 16; break;
   1258       case ARM64vecshi_SHL16x8:
   1259       case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
   1260       case ARM64vecshi_SQSHLU16x8:
   1261          minSh = 0; maxSh = 15; break;
   1262       case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
   1263          minSh = 1; maxSh = 8; break;
   1264       case ARM64vecshi_SHL8x16:
   1265       case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
   1266       case ARM64vecshi_SQSHLU8x16:
   1267          minSh = 0; maxSh = 7; break;
   1268       default:
   1269          vassert(0);
   1270    }
   1271    vassert(maxSh > 0);
   1272    vassert(amt >= minSh && amt <= maxSh);
   1273    return i;
   1274 }
   1275 ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
   1276    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1277    i->tag                 = ARM64in_VExtV;
   1278    i->ARM64in.VExtV.dst   = dst;
   1279    i->ARM64in.VExtV.srcLo = srcLo;
   1280    i->ARM64in.VExtV.srcHi = srcHi;
   1281    i->ARM64in.VExtV.amtB  = amtB;
   1282    vassert(amtB >= 1 && amtB <= 15);
   1283    return i;
   1284 }
   1285 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
   1286    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1287    i->tag               = ARM64in_VImmQ;
   1288    i->ARM64in.VImmQ.rQ  = rQ;
   1289    i->ARM64in.VImmQ.imm = imm;
   1290    /* Check that this is something that can actually be emitted. */
   1291    switch (imm) {
   1292       case 0x0000: case 0x0001: case 0x0003:
   1293       case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
   1294          break;
   1295       default:
   1296          vassert(0);
   1297    }
   1298    return i;
   1299 }
   1300 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
   1301    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1302    i->tag                = ARM64in_VDfromX;
   1303    i->ARM64in.VDfromX.rD = rD;
   1304    i->ARM64in.VDfromX.rX = rX;
   1305    return i;
   1306 }
   1307 ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
   1308    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1309    i->tag                  = ARM64in_VQfromX;
   1310    i->ARM64in.VQfromX.rQ   = rQ;
   1311    i->ARM64in.VQfromX.rXlo = rXlo;
   1312    return i;
   1313 }
   1314 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
   1315    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1316    i->tag                   = ARM64in_VQfromXX;
   1317    i->ARM64in.VQfromXX.rQ   = rQ;
   1318    i->ARM64in.VQfromXX.rXhi = rXhi;
   1319    i->ARM64in.VQfromXX.rXlo = rXlo;
   1320    return i;
   1321 }
   1322 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
   1323    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1324    i->tag                    = ARM64in_VXfromQ;
   1325    i->ARM64in.VXfromQ.rX     = rX;
   1326    i->ARM64in.VXfromQ.rQ     = rQ;
   1327    i->ARM64in.VXfromQ.laneNo = laneNo;
   1328    vassert(laneNo <= 1);
   1329    return i;
   1330 }
   1331 ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
   1332    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1333    i->tag                      = ARM64in_VXfromDorS;
   1334    i->ARM64in.VXfromDorS.rX    = rX;
   1335    i->ARM64in.VXfromDorS.rDorS = rDorS;
   1336    i->ARM64in.VXfromDorS.fromD = fromD;
   1337    return i;
   1338 }
   1339 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
   1340    ARM64Instr* i       = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1341    i->tag              = ARM64in_VMov;
   1342    i->ARM64in.VMov.szB = szB;
   1343    i->ARM64in.VMov.dst = dst;
   1344    i->ARM64in.VMov.src = src;
   1345    switch (szB) {
   1346       case 16:
   1347         vassert(hregClass(src) == HRcVec128);
   1348         vassert(hregClass(dst) == HRcVec128);
   1349         break;
   1350       case 8:
   1351         vassert(hregClass(src) == HRcFlt64);
   1352         vassert(hregClass(dst) == HRcFlt64);
   1353         break;
   1354       default:
   1355         vpanic("ARM64Instr_VMov");
   1356    }
   1357    return i;
   1358 }
   1359 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
   1360                                  ARM64AMode* amFailAddr ) {
   1361    ARM64Instr* i                 = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1362    i->tag                        = ARM64in_EvCheck;
   1363    i->ARM64in.EvCheck.amCounter  = amCounter;
   1364    i->ARM64in.EvCheck.amFailAddr = amFailAddr;
   1365    return i;
   1366 }
   1367 ARM64Instr* ARM64Instr_ProfInc ( void ) {
   1368    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1369    i->tag        = ARM64in_ProfInc;
   1370    return i;
   1371 }
   1372 
   1373 /* ... */
   1374 
   1375 void ppARM64Instr ( const ARM64Instr* i ) {
   1376    switch (i->tag) {
   1377       case ARM64in_Arith:
   1378          vex_printf("%s    ", i->ARM64in.Arith.isAdd ? "add" : "sub");
   1379          ppHRegARM64(i->ARM64in.Arith.dst);
   1380          vex_printf(", ");
   1381          ppHRegARM64(i->ARM64in.Arith.argL);
   1382          vex_printf(", ");
   1383          ppARM64RIA(i->ARM64in.Arith.argR);
   1384          return;
   1385       case ARM64in_Cmp:
   1386          vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? "   " : "(w)" );
   1387          ppHRegARM64(i->ARM64in.Cmp.argL);
   1388          vex_printf(", ");
   1389          ppARM64RIA(i->ARM64in.Cmp.argR);
   1390          return;
   1391       case ARM64in_Logic:
   1392          vex_printf("%s    ", showARM64LogicOp(i->ARM64in.Logic.op));
   1393          ppHRegARM64(i->ARM64in.Logic.dst);
   1394          vex_printf(", ");
   1395          ppHRegARM64(i->ARM64in.Logic.argL);
   1396          vex_printf(", ");
   1397          ppARM64RIL(i->ARM64in.Logic.argR);
   1398          return;
   1399       case ARM64in_Test:
   1400          vex_printf("tst    ");
   1401          ppHRegARM64(i->ARM64in.Test.argL);
   1402          vex_printf(", ");
   1403          ppARM64RIL(i->ARM64in.Test.argR);
   1404          return;
   1405       case ARM64in_Shift:
   1406          vex_printf("%s    ", showARM64ShiftOp(i->ARM64in.Shift.op));
   1407          ppHRegARM64(i->ARM64in.Shift.dst);
   1408          vex_printf(", ");
   1409          ppHRegARM64(i->ARM64in.Shift.argL);
   1410          vex_printf(", ");
   1411          ppARM64RI6(i->ARM64in.Shift.argR);
   1412          return;
   1413       case ARM64in_Unary:
   1414          vex_printf("%s    ", showARM64UnaryOp(i->ARM64in.Unary.op));
   1415          ppHRegARM64(i->ARM64in.Unary.dst);
   1416          vex_printf(", ");
   1417          ppHRegARM64(i->ARM64in.Unary.src);
   1418          return;
   1419       case ARM64in_MovI:
   1420          vex_printf("mov    ");
   1421          ppHRegARM64(i->ARM64in.MovI.dst);
   1422          vex_printf(", ");
   1423          ppHRegARM64(i->ARM64in.MovI.src);
   1424          return;
   1425       case ARM64in_Imm64:
   1426          vex_printf("imm64  ");
   1427          ppHRegARM64(i->ARM64in.Imm64.dst);
   1428          vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
   1429          return;
   1430       case ARM64in_LdSt64:
   1431          if (i->ARM64in.LdSt64.isLoad) {
   1432             vex_printf("ldr    ");
   1433             ppHRegARM64(i->ARM64in.LdSt64.rD);
   1434             vex_printf(", ");
   1435             ppARM64AMode(i->ARM64in.LdSt64.amode);
   1436          } else {
   1437             vex_printf("str    ");
   1438             ppARM64AMode(i->ARM64in.LdSt64.amode);
   1439             vex_printf(", ");
   1440             ppHRegARM64(i->ARM64in.LdSt64.rD);
   1441          }
   1442          return;
   1443       case ARM64in_LdSt32:
   1444          if (i->ARM64in.LdSt32.isLoad) {
   1445             vex_printf("ldruw  ");
   1446             ppHRegARM64(i->ARM64in.LdSt32.rD);
   1447             vex_printf(", ");
   1448             ppARM64AMode(i->ARM64in.LdSt32.amode);
   1449          } else {
   1450             vex_printf("strw   ");
   1451             ppARM64AMode(i->ARM64in.LdSt32.amode);
   1452             vex_printf(", ");
   1453             ppHRegARM64(i->ARM64in.LdSt32.rD);
   1454          }
   1455          return;
   1456       case ARM64in_LdSt16:
   1457          if (i->ARM64in.LdSt16.isLoad) {
   1458             vex_printf("ldruh  ");
   1459             ppHRegARM64(i->ARM64in.LdSt16.rD);
   1460             vex_printf(", ");
   1461             ppARM64AMode(i->ARM64in.LdSt16.amode);
   1462          } else {
   1463             vex_printf("strh   ");
   1464             ppARM64AMode(i->ARM64in.LdSt16.amode);
   1465             vex_printf(", ");
   1466             ppHRegARM64(i->ARM64in.LdSt16.rD);
   1467          }
   1468          return;
   1469       case ARM64in_LdSt8:
   1470          if (i->ARM64in.LdSt8.isLoad) {
   1471             vex_printf("ldrub  ");
   1472             ppHRegARM64(i->ARM64in.LdSt8.rD);
   1473             vex_printf(", ");
   1474             ppARM64AMode(i->ARM64in.LdSt8.amode);
   1475          } else {
   1476             vex_printf("strb   ");
   1477             ppARM64AMode(i->ARM64in.LdSt8.amode);
   1478             vex_printf(", ");
   1479             ppHRegARM64(i->ARM64in.LdSt8.rD);
   1480          }
   1481          return;
   1482       case ARM64in_XDirect:
   1483          vex_printf("(xDirect) ");
   1484          vex_printf("if (%%pstate.%s) { ",
   1485                     showARM64CondCode(i->ARM64in.XDirect.cond));
   1486          vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
   1487          vex_printf("str x9,");
   1488          ppARM64AMode(i->ARM64in.XDirect.amPC);
   1489          vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
   1490                     i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
   1491          vex_printf("blr x9 }");
   1492          return;
   1493       case ARM64in_XIndir:
   1494          vex_printf("(xIndir) ");
   1495          vex_printf("if (%%pstate.%s) { ",
   1496                     showARM64CondCode(i->ARM64in.XIndir.cond));
   1497          vex_printf("str ");
   1498          ppHRegARM64(i->ARM64in.XIndir.dstGA);
   1499          vex_printf(",");
   1500          ppARM64AMode(i->ARM64in.XIndir.amPC);
   1501          vex_printf("; imm64 x9,$disp_cp_xindir; ");
   1502          vex_printf("br x9 }");
   1503          return;
   1504       case ARM64in_XAssisted:
   1505          vex_printf("(xAssisted) ");
   1506          vex_printf("if (%%pstate.%s) { ",
   1507                     showARM64CondCode(i->ARM64in.XAssisted.cond));
   1508          vex_printf("str ");
   1509          ppHRegARM64(i->ARM64in.XAssisted.dstGA);
   1510          vex_printf(",");
   1511          ppARM64AMode(i->ARM64in.XAssisted.amPC);
   1512          vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
   1513                     (Int)i->ARM64in.XAssisted.jk);
   1514          vex_printf("imm64 x9,$disp_cp_xassisted; ");
   1515          vex_printf("br x9 }");
   1516          return;
   1517       case ARM64in_CSel:
   1518          vex_printf("csel   ");
   1519          ppHRegARM64(i->ARM64in.CSel.dst);
   1520          vex_printf(", ");
   1521          ppHRegARM64(i->ARM64in.CSel.argL);
   1522          vex_printf(", ");
   1523          ppHRegARM64(i->ARM64in.CSel.argR);
   1524          vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
   1525          return;
   1526       case ARM64in_Call:
   1527          vex_printf("call%s ",
   1528                     i->ARM64in.Call.cond==ARM64cc_AL
   1529                        ? "  " : showARM64CondCode(i->ARM64in.Call.cond));
   1530          vex_printf("0x%llx [nArgRegs=%d, ",
   1531                     i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
   1532          ppRetLoc(i->ARM64in.Call.rloc);
   1533          vex_printf("]");
   1534          return;
   1535       case ARM64in_AddToSP: {
   1536          Int simm = i->ARM64in.AddToSP.simm;
   1537          vex_printf("%s    xsp, xsp, #%d", simm < 0 ? "sub" : "add",
   1538                                            simm < 0 ? -simm : simm);
   1539          return;
   1540       }
   1541       case ARM64in_FromSP:
   1542          vex_printf("mov    ");
   1543          ppHRegARM64(i->ARM64in.FromSP.dst);
   1544          vex_printf(", xsp");
   1545          return;
   1546       case ARM64in_Mul:
   1547          vex_printf("%s  ", showARM64MulOp(i->ARM64in.Mul.op));
   1548          ppHRegARM64(i->ARM64in.Mul.dst);
   1549          vex_printf(", ");
   1550          ppHRegARM64(i->ARM64in.Mul.argL);
   1551          vex_printf(", ");
   1552          ppHRegARM64(i->ARM64in.Mul.argR);
   1553          return;
   1554 
   1555       case ARM64in_LdrEX: {
   1556          const HChar* sz = " ";
   1557          switch (i->ARM64in.LdrEX.szB) {
   1558             case 1: sz = "b"; break;
   1559             case 2: sz = "h"; break;
   1560             case 4: case 8: break;
   1561             default: vassert(0);
   1562          }
   1563          vex_printf("ldxr%s  %c2, [x4]",
   1564                     sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
   1565          return;
   1566       }
   1567       case ARM64in_StrEX: {
   1568          const HChar* sz = " ";
   1569          switch (i->ARM64in.StrEX.szB) {
   1570             case 1: sz = "b"; break;
   1571             case 2: sz = "h"; break;
   1572             case 4: case 8: break;
   1573             default: vassert(0);
   1574          }
   1575          vex_printf("stxr%s  w0, %c2, [x4]",
   1576                     sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
   1577          return;
   1578       }
   1579       case ARM64in_CAS: {
   1580          vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
   1581          return;
   1582       }
   1583       case ARM64in_MFence:
   1584          vex_printf("(mfence) dsb sy; dmb sy; isb");
   1585          return;
   1586       case ARM64in_ClrEX:
   1587          vex_printf("clrex #15");
   1588          return;
   1589       case ARM64in_VLdStH:
   1590          if (i->ARM64in.VLdStH.isLoad) {
   1591             vex_printf("ldr    ");
   1592             ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
   1593             vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
   1594             ppHRegARM64(i->ARM64in.VLdStH.rN);
   1595             vex_printf(")");
   1596          } else {
   1597             vex_printf("str    ");
   1598             vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
   1599             ppHRegARM64(i->ARM64in.VLdStH.rN);
   1600             vex_printf("), ");
   1601             ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
   1602          }
   1603          return;
   1604       case ARM64in_VLdStS:
   1605          if (i->ARM64in.VLdStS.isLoad) {
   1606             vex_printf("ldr    ");
   1607             ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
   1608             vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
   1609             ppHRegARM64(i->ARM64in.VLdStS.rN);
   1610             vex_printf(")");
   1611          } else {
   1612             vex_printf("str    ");
   1613             vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
   1614             ppHRegARM64(i->ARM64in.VLdStS.rN);
   1615             vex_printf("), ");
   1616             ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
   1617          }
   1618          return;
   1619       case ARM64in_VLdStD:
   1620          if (i->ARM64in.VLdStD.isLoad) {
   1621             vex_printf("ldr    ");
   1622             ppHRegARM64(i->ARM64in.VLdStD.dD);
   1623             vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
   1624             ppHRegARM64(i->ARM64in.VLdStD.rN);
   1625             vex_printf(")");
   1626          } else {
   1627             vex_printf("str    ");
   1628             vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
   1629             ppHRegARM64(i->ARM64in.VLdStD.rN);
   1630             vex_printf("), ");
   1631             ppHRegARM64(i->ARM64in.VLdStD.dD);
   1632          }
   1633          return;
   1634       case ARM64in_VLdStQ:
   1635          if (i->ARM64in.VLdStQ.isLoad)
   1636             vex_printf("ld1.2d {");
   1637          else
   1638             vex_printf("st1.2d {");
   1639          ppHRegARM64(i->ARM64in.VLdStQ.rQ);
   1640          vex_printf("}, [");
   1641          ppHRegARM64(i->ARM64in.VLdStQ.rN);
   1642          vex_printf("]");
   1643          return;
   1644       case ARM64in_VCvtI2F: {
   1645          HChar syn  = '?';
   1646          UInt  fszB = 0;
   1647          UInt  iszB = 0;
   1648          characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
   1649          vex_printf("%ccvtf  ", syn);
   1650          ppHRegARM64(i->ARM64in.VCvtI2F.rD);
   1651          vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
   1652          ppHRegARM64(i->ARM64in.VCvtI2F.rS);
   1653          vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
   1654          return;
   1655       }
   1656       case ARM64in_VCvtF2I: {
   1657          HChar syn  = '?';
   1658          UInt  fszB = 0;
   1659          UInt  iszB = 0;
   1660          HChar rmo  = '?';
   1661          characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
   1662          UChar armRM = i->ARM64in.VCvtF2I.armRM;
   1663          if (armRM < 4) rmo = "npmz"[armRM];
   1664          vex_printf("fcvt%c%c ", rmo, syn);
   1665          ppHRegARM64(i->ARM64in.VCvtF2I.rD);
   1666          vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
   1667          ppHRegARM64(i->ARM64in.VCvtF2I.rS);
   1668          vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
   1669          return;
   1670       }
   1671       case ARM64in_VCvtSD:
   1672          vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
   1673          if (i->ARM64in.VCvtSD.sToD) {
   1674             ppHRegARM64(i->ARM64in.VCvtSD.dst);
   1675             vex_printf(", ");
   1676             ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
   1677          } else {
   1678             ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
   1679             vex_printf(", ");
   1680             ppHRegARM64(i->ARM64in.VCvtSD.src);
   1681          }
   1682          return;
   1683       case ARM64in_VCvtHS:
   1684          vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
   1685          if (i->ARM64in.VCvtHS.hToS) {
   1686             ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
   1687             vex_printf(", ");
   1688             ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
   1689          } else {
   1690             ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
   1691             vex_printf(", ");
   1692             ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
   1693          }
   1694          return;
   1695       case ARM64in_VCvtHD:
   1696          vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
   1697          if (i->ARM64in.VCvtHD.hToD) {
   1698             ppHRegARM64(i->ARM64in.VCvtHD.dst);
   1699             vex_printf(", ");
   1700             ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
   1701          } else {
   1702             ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
   1703             vex_printf(", ");
   1704             ppHRegARM64(i->ARM64in.VCvtHD.src);
   1705          }
   1706          return;
   1707       case ARM64in_VUnaryD:
   1708          vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
   1709          ppHRegARM64(i->ARM64in.VUnaryD.dst);
   1710          vex_printf(", ");
   1711          ppHRegARM64(i->ARM64in.VUnaryD.src);
   1712          return;
   1713       case ARM64in_VUnaryS:
   1714          vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
   1715          ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
   1716          vex_printf(", ");
   1717          ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
   1718          return;
   1719       case ARM64in_VBinD:
   1720          vex_printf("f%s   ", showARM64FpBinOp(i->ARM64in.VBinD.op));
   1721          ppHRegARM64(i->ARM64in.VBinD.dst);
   1722          vex_printf(", ");
   1723          ppHRegARM64(i->ARM64in.VBinD.argL);
   1724          vex_printf(", ");
   1725          ppHRegARM64(i->ARM64in.VBinD.argR);
   1726          return;
   1727       case ARM64in_VBinS:
   1728          vex_printf("f%s   ", showARM64FpBinOp(i->ARM64in.VBinS.op));
   1729          ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
   1730          vex_printf(", ");
   1731          ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
   1732          vex_printf(", ");
   1733          ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
   1734          return;
   1735       case ARM64in_VCmpD:
   1736          vex_printf("fcmp   ");
   1737          ppHRegARM64(i->ARM64in.VCmpD.argL);
   1738          vex_printf(", ");
   1739          ppHRegARM64(i->ARM64in.VCmpD.argR);
   1740          return;
   1741       case ARM64in_VCmpS:
   1742          vex_printf("fcmp   ");
   1743          ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
   1744          vex_printf(", ");
   1745          ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
   1746          return;
   1747       case ARM64in_VFCSel: {
   1748          void (*ppHRegARM64fp)(HReg)
   1749             = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
   1750          vex_printf("fcsel  ");
   1751          ppHRegARM64fp(i->ARM64in.VFCSel.dst);
   1752          vex_printf(", ");
   1753          ppHRegARM64fp(i->ARM64in.VFCSel.argL);
   1754          vex_printf(", ");
   1755          ppHRegARM64fp(i->ARM64in.VFCSel.argR);
   1756          vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
   1757          return;
   1758       }
   1759       case ARM64in_FPCR:
   1760          if (i->ARM64in.FPCR.toFPCR) {
   1761             vex_printf("msr    fpcr, ");
   1762             ppHRegARM64(i->ARM64in.FPCR.iReg);
   1763          } else {
   1764             vex_printf("mrs    ");
   1765             ppHRegARM64(i->ARM64in.FPCR.iReg);
   1766             vex_printf(", fpcr");
   1767          }
   1768          return;
   1769       case ARM64in_FPSR:
   1770          if (i->ARM64in.FPSR.toFPSR) {
   1771             vex_printf("msr    fpsr, ");
   1772             ppHRegARM64(i->ARM64in.FPSR.iReg);
   1773          } else {
   1774             vex_printf("mrs    ");
   1775             ppHRegARM64(i->ARM64in.FPSR.iReg);
   1776             vex_printf(", fpsr");
   1777          }
   1778          return;
   1779       case ARM64in_VBinV: {
   1780          const HChar* nm = "??";
   1781          const HChar* ar = "??";
   1782          showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
   1783          vex_printf("%s ", nm);
   1784          ppHRegARM64(i->ARM64in.VBinV.dst);
   1785          vex_printf(".%s, ", ar);
   1786          ppHRegARM64(i->ARM64in.VBinV.argL);
   1787          vex_printf(".%s, ", ar);
   1788          ppHRegARM64(i->ARM64in.VBinV.argR);
   1789          vex_printf(".%s", ar);
   1790          return;
   1791       }
   1792       case ARM64in_VModifyV: {
   1793          const HChar* nm = "??";
   1794          const HChar* ar = "??";
   1795          showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
   1796          vex_printf("%s ", nm);
   1797          ppHRegARM64(i->ARM64in.VModifyV.mod);
   1798          vex_printf(".%s, ", ar);
   1799          ppHRegARM64(i->ARM64in.VModifyV.arg);
   1800          vex_printf(".%s", ar);
   1801          return;
   1802       }
   1803       case ARM64in_VUnaryV: {
   1804          const HChar* nm = "??";
   1805          const HChar* ar = "??";
   1806          showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
   1807          vex_printf("%s  ", nm);
   1808          ppHRegARM64(i->ARM64in.VUnaryV.dst);
   1809          vex_printf(".%s, ", ar);
   1810          ppHRegARM64(i->ARM64in.VUnaryV.arg);
   1811          vex_printf(".%s", ar);
   1812          return;
   1813       }
   1814       case ARM64in_VNarrowV: {
   1815          UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
   1816          const HChar* darr[3] = { "8b", "4h", "2s" };
   1817          const HChar* sarr[3] = { "8h", "4s", "2d" };
   1818          const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
   1819          vex_printf("%s ", nm);
   1820          ppHRegARM64(i->ARM64in.VNarrowV.dst);
   1821          vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
   1822          ppHRegARM64(i->ARM64in.VNarrowV.src);
   1823          vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
   1824          return;
   1825       }
   1826       case ARM64in_VShiftImmV: {
   1827          const HChar* nm = "??";
   1828          const HChar* ar = "??";
   1829          showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
   1830          vex_printf("%s ", nm);
   1831          ppHRegARM64(i->ARM64in.VShiftImmV.dst);
   1832          vex_printf(".%s, ", ar);
   1833          ppHRegARM64(i->ARM64in.VShiftImmV.src);
   1834          vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
   1835          return;
   1836       }
   1837       case ARM64in_VExtV: {
   1838          vex_printf("ext    ");
   1839          ppHRegARM64(i->ARM64in.VExtV.dst);
   1840          vex_printf(".16b, ");
   1841          ppHRegARM64(i->ARM64in.VExtV.srcLo);
   1842          vex_printf(".16b, ");
   1843          ppHRegARM64(i->ARM64in.VExtV.srcHi);
   1844          vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
   1845          return;
   1846       }
   1847       case ARM64in_VImmQ:
   1848          vex_printf("qimm   ");
   1849          ppHRegARM64(i->ARM64in.VImmQ.rQ);
   1850          vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
   1851          return;
   1852       case ARM64in_VDfromX:
   1853          vex_printf("fmov   ");
   1854          ppHRegARM64(i->ARM64in.VDfromX.rD);
   1855          vex_printf(", ");
   1856          ppHRegARM64(i->ARM64in.VDfromX.rX);
   1857          return;
   1858       case ARM64in_VQfromX:
   1859          vex_printf("fmov   ");
   1860          ppHRegARM64(i->ARM64in.VQfromX.rQ);
   1861          vex_printf(".d[0], ");
   1862          ppHRegARM64(i->ARM64in.VQfromX.rXlo);
   1863          return;
   1864       case ARM64in_VQfromXX:
   1865          vex_printf("qFromXX ");
   1866          ppHRegARM64(i->ARM64in.VQfromXX.rQ);
   1867          vex_printf(", ");
   1868          ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
   1869          vex_printf(", ");
   1870          ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
   1871          return;
   1872       case ARM64in_VXfromQ:
   1873          vex_printf("fmov   ");
   1874          ppHRegARM64(i->ARM64in.VXfromQ.rX);
   1875          vex_printf(", ");
   1876          ppHRegARM64(i->ARM64in.VXfromQ.rQ);
   1877          vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
   1878          return;
   1879       case ARM64in_VXfromDorS:
   1880          vex_printf("fmov   ");
   1881          ppHRegARM64(i->ARM64in.VXfromDorS.rX);
   1882          vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
   1883          ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
   1884          vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
   1885          return;
   1886       case ARM64in_VMov: {
   1887          UChar aux = '?';
   1888          switch (i->ARM64in.VMov.szB) {
   1889             case 16: aux = 'q'; break;
   1890             case 8:  aux = 'd'; break;
   1891             case 4:  aux = 's'; break;
   1892             default: break;
   1893          }
   1894          vex_printf("mov(%c) ", aux);
   1895          ppHRegARM64(i->ARM64in.VMov.dst);
   1896          vex_printf(", ");
   1897          ppHRegARM64(i->ARM64in.VMov.src);
   1898          return;
   1899       }
   1900       case ARM64in_EvCheck:
   1901          vex_printf("(evCheck) ldr w9,");
   1902          ppARM64AMode(i->ARM64in.EvCheck.amCounter);
   1903          vex_printf("; subs w9,w9,$1; str w9,");
   1904          ppARM64AMode(i->ARM64in.EvCheck.amCounter);
   1905          vex_printf("; bpl nofail; ldr x9,");
   1906          ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
   1907          vex_printf("; br x9; nofail:");
   1908          return;
   1909       case ARM64in_ProfInc:
   1910          vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
   1911                     "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
   1912          return;
   1913       default:
   1914          vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
   1915          vpanic("ppARM64Instr(1)");
   1916          return;
   1917    }
   1918 }
   1919 
   1920 
   1921 /* --------- Helpers for register allocation. --------- */
   1922 
   1923 void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
   1924 {
   1925    vassert(mode64 == True);
   1926    initHRegUsage(u);
   1927    switch (i->tag) {
   1928       case ARM64in_Arith:
   1929          addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
   1930          addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
   1931          addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
   1932          return;
   1933       case ARM64in_Cmp:
   1934          addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
   1935          addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
   1936          return;
   1937       case ARM64in_Logic:
   1938          addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
   1939          addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
   1940          addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
   1941          return;
   1942       case ARM64in_Test:
   1943          addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
   1944          addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
   1945          return;
   1946       case ARM64in_Shift:
   1947          addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
   1948          addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
   1949          addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
   1950          return;
   1951       case ARM64in_Unary:
   1952          addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
   1953          addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
   1954          return;
   1955       case ARM64in_MovI:
   1956          addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
   1957          addHRegUse(u, HRmRead,  i->ARM64in.MovI.src);
   1958          return;
   1959       case ARM64in_Imm64:
   1960          addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
   1961          return;
   1962       case ARM64in_LdSt64:
   1963          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
   1964          if (i->ARM64in.LdSt64.isLoad) {
   1965             addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
   1966          } else {
   1967             addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
   1968          }
   1969          return;
   1970       case ARM64in_LdSt32:
   1971          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
   1972          if (i->ARM64in.LdSt32.isLoad) {
   1973             addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
   1974          } else {
   1975             addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
   1976          }
   1977          return;
   1978       case ARM64in_LdSt16:
   1979          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
   1980          if (i->ARM64in.LdSt16.isLoad) {
   1981             addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
   1982          } else {
   1983             addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
   1984          }
   1985          return;
   1986       case ARM64in_LdSt8:
   1987          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
   1988          if (i->ARM64in.LdSt8.isLoad) {
   1989             addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
   1990          } else {
   1991             addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
   1992          }
   1993          return;
   1994       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
   1995          conditionally exit the block.  Hence we only need to list (1)
   1996          the registers that they read, and (2) the registers that they
   1997          write in the case where the block is not exited.  (2) is
   1998          empty, hence only (1) is relevant here. */
   1999       case ARM64in_XDirect:
   2000          addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
   2001          return;
   2002       case ARM64in_XIndir:
   2003          addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
   2004          addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
   2005          return;
   2006       case ARM64in_XAssisted:
   2007          addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
   2008          addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
   2009          return;
   2010       case ARM64in_CSel:
   2011          addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
   2012          addHRegUse(u, HRmRead,  i->ARM64in.CSel.argL);
   2013          addHRegUse(u, HRmRead,  i->ARM64in.CSel.argR);
   2014          return;
   2015       case ARM64in_Call:
   2016          /* logic and comments copied/modified from x86 back end */
   2017          /* This is a bit subtle. */
   2018          /* First off, claim it trashes all the caller-saved regs
   2019             which fall within the register allocator's jurisdiction.
   2020             These I believe to be x0 to x7 and the 128-bit vector
   2021             registers in use, q16 .. q20. */
   2022          addHRegUse(u, HRmWrite, hregARM64_X0());
   2023          addHRegUse(u, HRmWrite, hregARM64_X1());
   2024          addHRegUse(u, HRmWrite, hregARM64_X2());
   2025          addHRegUse(u, HRmWrite, hregARM64_X3());
   2026          addHRegUse(u, HRmWrite, hregARM64_X4());
   2027          addHRegUse(u, HRmWrite, hregARM64_X5());
   2028          addHRegUse(u, HRmWrite, hregARM64_X6());
   2029          addHRegUse(u, HRmWrite, hregARM64_X7());
   2030          addHRegUse(u, HRmWrite, hregARM64_Q16());
   2031          addHRegUse(u, HRmWrite, hregARM64_Q17());
   2032          addHRegUse(u, HRmWrite, hregARM64_Q18());
   2033          addHRegUse(u, HRmWrite, hregARM64_Q19());
   2034          addHRegUse(u, HRmWrite, hregARM64_Q20());
   2035          /* Now we have to state any parameter-carrying registers
   2036             which might be read.  This depends on nArgRegs. */
   2037             switch (i->ARM64in.Call.nArgRegs) {
   2038             case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
   2039             case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
   2040             case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
   2041             case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
   2042             case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
   2043             case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
   2044             case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
   2045             case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
   2046             case 0: break;
   2047             default: vpanic("getRegUsage_ARM64:Call:regparms");
   2048          }
   2049          /* Finally, there is the issue that the insn trashes a
   2050             register because the literal target address has to be
   2051             loaded into a register.  However, we reserve x9 for that
   2052             purpose so there's no further complexity here.  Stating x9
   2053             as trashed is pointless since it's not under the control
   2054             of the allocator, but what the hell. */
   2055          addHRegUse(u, HRmWrite, hregARM64_X9());
   2056          return;
   2057       case ARM64in_AddToSP:
   2058          /* Only changes SP, but regalloc doesn't control that, hence
   2059             we don't care. */
   2060          return;
   2061       case ARM64in_FromSP:
   2062          addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
   2063          return;
   2064       case ARM64in_Mul:
   2065          addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
   2066          addHRegUse(u, HRmRead,  i->ARM64in.Mul.argL);
   2067          addHRegUse(u, HRmRead,  i->ARM64in.Mul.argR);
   2068          return;
   2069       case ARM64in_LdrEX:
   2070          addHRegUse(u, HRmRead, hregARM64_X4());
   2071          addHRegUse(u, HRmWrite, hregARM64_X2());
   2072          return;
   2073       case ARM64in_StrEX:
   2074          addHRegUse(u, HRmRead, hregARM64_X4());
   2075          addHRegUse(u, HRmWrite, hregARM64_X0());
   2076          addHRegUse(u, HRmRead, hregARM64_X2());
   2077          return;
   2078       case ARM64in_CAS:
   2079          addHRegUse(u, HRmRead, hregARM64_X3());
   2080          addHRegUse(u, HRmRead, hregARM64_X5());
   2081          addHRegUse(u, HRmRead, hregARM64_X7());
   2082          addHRegUse(u, HRmWrite, hregARM64_X1());
   2083          /* Pointless to state this since X8 is not available to RA. */
   2084          addHRegUse(u, HRmWrite, hregARM64_X8());
   2085          break;
   2086       case ARM64in_MFence:
   2087          return;
   2088       case ARM64in_ClrEX:
   2089          return;
   2090       case ARM64in_VLdStH:
   2091          addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
   2092          if (i->ARM64in.VLdStH.isLoad) {
   2093             addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
   2094          } else {
   2095             addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
   2096          }
   2097          return;
   2098       case ARM64in_VLdStS:
   2099          addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
   2100          if (i->ARM64in.VLdStS.isLoad) {
   2101             addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
   2102          } else {
   2103             addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
   2104          }
   2105          return;
   2106       case ARM64in_VLdStD:
   2107          addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
   2108          if (i->ARM64in.VLdStD.isLoad) {
   2109             addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
   2110          } else {
   2111             addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
   2112          }
   2113          return;
   2114       case ARM64in_VLdStQ:
   2115          addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
   2116          if (i->ARM64in.VLdStQ.isLoad)
   2117             addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
   2118          else
   2119             addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
   2120          return;
   2121       case ARM64in_VCvtI2F:
   2122          addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
   2123          addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
   2124          return;
   2125       case ARM64in_VCvtF2I:
   2126          addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
   2127          addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
   2128          return;
   2129       case ARM64in_VCvtSD:
   2130          addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
   2131          addHRegUse(u, HRmRead,  i->ARM64in.VCvtSD.src);
   2132          return;
   2133       case ARM64in_VCvtHS:
   2134          addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
   2135          addHRegUse(u, HRmRead,  i->ARM64in.VCvtHS.src);
   2136          return;
   2137       case ARM64in_VCvtHD:
   2138          addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
   2139          addHRegUse(u, HRmRead,  i->ARM64in.VCvtHD.src);
   2140          return;
   2141       case ARM64in_VUnaryD:
   2142          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
   2143          addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
   2144          return;
   2145       case ARM64in_VUnaryS:
   2146          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
   2147          addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
   2148          return;
   2149       case ARM64in_VBinD:
   2150          addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
   2151          addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
   2152          addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
   2153          return;
   2154       case ARM64in_VBinS:
   2155          addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
   2156          addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
   2157          addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
   2158          return;
   2159       case ARM64in_VCmpD:
   2160          addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
   2161          addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
   2162          return;
   2163       case ARM64in_VCmpS:
   2164          addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
   2165          addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
   2166          return;
   2167       case ARM64in_VFCSel:
   2168          addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
   2169          addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
   2170          addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
   2171          return;
   2172       case ARM64in_FPCR:
   2173          if (i->ARM64in.FPCR.toFPCR)
   2174             addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
   2175          else
   2176             addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
   2177          return;
   2178       case ARM64in_FPSR:
   2179          if (i->ARM64in.FPSR.toFPSR)
   2180             addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
   2181          else
   2182             addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
   2183          return;
   2184       case ARM64in_VBinV:
   2185          addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
   2186          addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
   2187          addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
   2188          return;
   2189       case ARM64in_VModifyV:
   2190          addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
   2191          addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
   2192          addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
   2193          return;
   2194       case ARM64in_VUnaryV:
   2195          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
   2196          addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
   2197          return;
   2198       case ARM64in_VNarrowV:
   2199          addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
   2200          addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
   2201          return;
   2202       case ARM64in_VShiftImmV:
   2203          addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
   2204          addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
   2205          return;
   2206       case ARM64in_VExtV:
   2207          addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
   2208          addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
   2209          addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
   2210          return;
   2211       case ARM64in_VImmQ:
   2212          addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
   2213          return;
   2214       case ARM64in_VDfromX:
   2215          addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
   2216          addHRegUse(u, HRmRead,  i->ARM64in.VDfromX.rX);
   2217          return;
   2218       case ARM64in_VQfromX:
   2219          addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
   2220          addHRegUse(u, HRmRead,  i->ARM64in.VQfromX.rXlo);
   2221          return;
   2222       case ARM64in_VQfromXX:
   2223          addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
   2224          addHRegUse(u, HRmRead,  i->ARM64in.VQfromXX.rXhi);
   2225          addHRegUse(u, HRmRead,  i->ARM64in.VQfromXX.rXlo);
   2226          return;
   2227       case ARM64in_VXfromQ:
   2228          addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
   2229          addHRegUse(u, HRmRead,  i->ARM64in.VXfromQ.rQ);
   2230          return;
   2231       case ARM64in_VXfromDorS:
   2232          addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
   2233          addHRegUse(u, HRmRead,  i->ARM64in.VXfromDorS.rDorS);
   2234          return;
   2235       case ARM64in_VMov:
   2236          addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
   2237          addHRegUse(u, HRmRead,  i->ARM64in.VMov.src);
   2238          return;
   2239       case ARM64in_EvCheck:
   2240          /* We expect both amodes only to mention x21, so this is in
   2241             fact pointless, since x21 isn't allocatable, but
   2242             anyway.. */
   2243          addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
   2244          addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
   2245          addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
   2246          return;
   2247       case ARM64in_ProfInc:
   2248          /* Again, pointless to actually state these since neither
   2249             is available to RA. */
   2250          addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
   2251          addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
   2252          return;
   2253       default:
   2254          ppARM64Instr(i);
   2255          vpanic("getRegUsage_ARM64Instr");
   2256    }
   2257 }
   2258 
   2259 
   2260 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
   2261 {
   2262    vassert(mode64 == True);
   2263    switch (i->tag) {
   2264       case ARM64in_Arith:
   2265          i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
   2266          i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
   2267          mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
   2268          return;
   2269       case ARM64in_Cmp:
   2270          i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
   2271          mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
   2272          return;
   2273       case ARM64in_Logic:
   2274          i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
   2275          i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
   2276          mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
   2277          return;
   2278       case ARM64in_Test:
   2279          i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
   2280          mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
   2281          return;
   2282       case ARM64in_Shift:
   2283          i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
   2284          i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
   2285          mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
   2286          return;
   2287       case ARM64in_Unary:
   2288          i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
   2289          i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
   2290          return;
   2291       case ARM64in_MovI:
   2292          i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
   2293          i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
   2294          return;
   2295       case ARM64in_Imm64:
   2296          i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
   2297          return;
   2298       case ARM64in_LdSt64:
   2299          i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
   2300          mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
   2301          return;
   2302       case ARM64in_LdSt32:
   2303          i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
   2304          mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
   2305          return;
   2306       case ARM64in_LdSt16:
   2307          i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
   2308          mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
   2309          return;
   2310       case ARM64in_LdSt8:
   2311          i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
   2312          mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
   2313          return;
   2314       case ARM64in_XDirect:
   2315          mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
   2316          return;
   2317       case ARM64in_XIndir:
   2318          i->ARM64in.XIndir.dstGA
   2319             = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
   2320          mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
   2321          return;
   2322       case ARM64in_XAssisted:
   2323          i->ARM64in.XAssisted.dstGA
   2324             = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
   2325          mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
   2326          return;
   2327       case ARM64in_CSel:
   2328          i->ARM64in.CSel.dst  = lookupHRegRemap(m, i->ARM64in.CSel.dst);
   2329          i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
   2330          i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
   2331          return;
   2332       case ARM64in_Call:
   2333          return;
   2334       case ARM64in_AddToSP:
   2335          return;
   2336       case ARM64in_FromSP:
   2337          i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
   2338          return;
   2339       case ARM64in_Mul:
   2340          i->ARM64in.Mul.dst  = lookupHRegRemap(m, i->ARM64in.Mul.dst);
   2341          i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
   2342          i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
   2343          break;
   2344       case ARM64in_LdrEX:
   2345          return;
   2346       case ARM64in_StrEX:
   2347          return;
   2348       case ARM64in_CAS:
   2349          return;
   2350       case ARM64in_MFence:
   2351          return;
   2352       case ARM64in_ClrEX:
   2353          return;
   2354       case ARM64in_VLdStH:
   2355          i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
   2356          i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
   2357          return;
   2358       case ARM64in_VLdStS:
   2359          i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
   2360          i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
   2361          return;
   2362       case ARM64in_VLdStD:
   2363          i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
   2364          i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
   2365          return;
   2366       case ARM64in_VLdStQ:
   2367          i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
   2368          i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
   2369          return;
   2370       case ARM64in_VCvtI2F:
   2371          i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
   2372          i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
   2373          return;
   2374       case ARM64in_VCvtF2I:
   2375          i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
   2376          i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
   2377          return;
   2378       case ARM64in_VCvtSD:
   2379          i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
   2380          i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
   2381          return;
   2382       case ARM64in_VCvtHS:
   2383          i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
   2384          i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
   2385          return;
   2386       case ARM64in_VCvtHD:
   2387          i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
   2388          i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
   2389          return;
   2390       case ARM64in_VUnaryD:
   2391          i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
   2392          i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
   2393          return;
   2394       case ARM64in_VUnaryS:
   2395          i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
   2396          i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
   2397          return;
   2398       case ARM64in_VBinD:
   2399          i->ARM64in.VBinD.dst  = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
   2400          i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
   2401          i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
   2402          return;
   2403       case ARM64in_VBinS:
   2404          i->ARM64in.VBinS.dst  = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
   2405          i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
   2406          i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
   2407          return;
   2408       case ARM64in_VCmpD:
   2409          i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
   2410          i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
   2411          return;
   2412       case ARM64in_VCmpS:
   2413          i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
   2414          i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
   2415          return;
   2416       case ARM64in_VFCSel:
   2417          i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
   2418          i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
   2419          i->ARM64in.VFCSel.dst  = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
   2420          return;
   2421       case ARM64in_FPCR:
   2422          i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
   2423          return;
   2424       case ARM64in_FPSR:
   2425          i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
   2426          return;
   2427       case ARM64in_VBinV:
   2428          i->ARM64in.VBinV.dst  = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
   2429          i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
   2430          i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
   2431          return;
   2432       case ARM64in_VModifyV:
   2433          i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
   2434          i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
   2435          return;
   2436       case ARM64in_VUnaryV:
   2437          i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
   2438          i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
   2439          return;
   2440       case ARM64in_VNarrowV:
   2441          i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
   2442          i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
   2443          return;
   2444       case ARM64in_VShiftImmV:
   2445          i->ARM64in.VShiftImmV.dst
   2446             = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
   2447          i->ARM64in.VShiftImmV.src
   2448             = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
   2449          return;
   2450       case ARM64in_VExtV:
   2451          i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
   2452          i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
   2453          i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
   2454          return;
   2455       case ARM64in_VImmQ:
   2456          i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
   2457          return;
   2458       case ARM64in_VDfromX:
   2459          i->ARM64in.VDfromX.rD
   2460             = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
   2461          i->ARM64in.VDfromX.rX
   2462             = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
   2463          return;
   2464       case ARM64in_VQfromX:
   2465          i->ARM64in.VQfromX.rQ
   2466             = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
   2467          i->ARM64in.VQfromX.rXlo
   2468             = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
   2469          return;
   2470       case ARM64in_VQfromXX:
   2471          i->ARM64in.VQfromXX.rQ
   2472             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
   2473          i->ARM64in.VQfromXX.rXhi
   2474             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
   2475          i->ARM64in.VQfromXX.rXlo
   2476             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
   2477          return;
   2478       case ARM64in_VXfromQ:
   2479          i->ARM64in.VXfromQ.rX
   2480             = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
   2481          i->ARM64in.VXfromQ.rQ
   2482             = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
   2483          return;
   2484       case ARM64in_VXfromDorS:
   2485          i->ARM64in.VXfromDorS.rX
   2486             = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
   2487          i->ARM64in.VXfromDorS.rDorS
   2488             = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
   2489          return;
   2490       case ARM64in_VMov:
   2491          i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
   2492          i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
   2493          return;
   2494       case ARM64in_EvCheck:
   2495          /* We expect both amodes only to mention x21, so this is in
   2496             fact pointless, since x21 isn't allocatable, but
   2497             anyway.. */
   2498          mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
   2499          mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
   2500          return;
   2501       case ARM64in_ProfInc:
   2502          /* hardwires x8 and x9 -- nothing to modify. */
   2503          return;
   2504       default:
   2505          ppARM64Instr(i);
   2506          vpanic("mapRegs_ARM64Instr");
   2507    }
   2508 }
   2509 
   2510 /* Figure out if i represents a reg-reg move, and if so assign the
   2511    source and destination to *src and *dst.  If in doubt say No.  Used
   2512    by the register allocator to do move coalescing.
   2513 */
   2514 Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
   2515 {
   2516    switch (i->tag) {
   2517       case ARM64in_MovI:
   2518          *src = i->ARM64in.MovI.src;
   2519          *dst = i->ARM64in.MovI.dst;
   2520          return True;
   2521       case ARM64in_VMov:
   2522          *src = i->ARM64in.VMov.src;
   2523          *dst = i->ARM64in.VMov.dst;
   2524          return True;
   2525       default:
   2526          break;
   2527    }
   2528 
   2529    return False;
   2530 }
   2531 
   2532 
   2533 /* Generate arm spill/reload instructions under the direction of the
   2534    register allocator.  Note it's critical these don't write the
   2535    condition codes. */
   2536 
   2537 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2538                       HReg rreg, Int offsetB, Bool mode64 )
   2539 {
   2540    HRegClass rclass;
   2541    vassert(offsetB >= 0);
   2542    vassert(!hregIsVirtual(rreg));
   2543    vassert(mode64 == True);
   2544    *i1 = *i2 = NULL;
   2545    rclass = hregClass(rreg);
   2546    switch (rclass) {
   2547       case HRcInt64:
   2548          vassert(0 == (offsetB & 7));
   2549          offsetB >>= 3;
   2550          vassert(offsetB < 4096);
   2551          *i1 = ARM64Instr_LdSt64(
   2552                   False/*!isLoad*/,
   2553                   rreg,
   2554                   ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
   2555                );
   2556          return;
   2557       case HRcFlt64:
   2558          vassert(0 == (offsetB & 7));
   2559          vassert(offsetB >= 0 && offsetB < 32768);
   2560          *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
   2561                                  rreg, hregARM64_X21(), offsetB);
   2562          return;
   2563       case HRcVec128: {
   2564          HReg x21  = hregARM64_X21();  // baseblock
   2565          HReg x9   = hregARM64_X9();   // spill temporary
   2566          vassert(0 == (offsetB & 15)); // check sane alignment
   2567          vassert(offsetB < 4096);
   2568          *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
   2569          *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
   2570          return;
   2571       }
   2572       default:
   2573          ppHRegClass(rclass);
   2574          vpanic("genSpill_ARM: unimplemented regclass");
   2575    }
   2576 }
   2577 
   2578 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2579                        HReg rreg, Int offsetB, Bool mode64 )
   2580 {
   2581    HRegClass rclass;
   2582    vassert(offsetB >= 0);
   2583    vassert(!hregIsVirtual(rreg));
   2584    vassert(mode64 == True);
   2585    *i1 = *i2 = NULL;
   2586    rclass = hregClass(rreg);
   2587    switch (rclass) {
   2588       case HRcInt64:
   2589          vassert(0 == (offsetB & 7));
   2590          offsetB >>= 3;
   2591          vassert(offsetB < 4096);
   2592          *i1 = ARM64Instr_LdSt64(
   2593                   True/*isLoad*/,
   2594                   rreg,
   2595                   ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
   2596                );
   2597          return;
   2598       case HRcFlt64:
   2599          vassert(0 == (offsetB & 7));
   2600          vassert(offsetB >= 0 && offsetB < 32768);
   2601          *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
   2602                                  rreg, hregARM64_X21(), offsetB);
   2603          return;
   2604       case HRcVec128: {
   2605          HReg x21  = hregARM64_X21();  // baseblock
   2606          HReg x9   = hregARM64_X9();   // spill temporary
   2607          vassert(0 == (offsetB & 15)); // check sane alignment
   2608          vassert(offsetB < 4096);
   2609          *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
   2610          *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
   2611          return;
   2612       }
   2613       default:
   2614          ppHRegClass(rclass);
   2615          vpanic("genReload_ARM: unimplemented regclass");
   2616    }
   2617 }
   2618 
   2619 
   2620 /* Emit an instruction into buf and return the number of bytes used.
   2621    Note that buf is not the insn's final place, and therefore it is
   2622    imperative to emit position-independent code. */
   2623 
   2624 static inline UInt iregEnc ( HReg r )
   2625 {
   2626    UInt n;
   2627    vassert(hregClass(r) == HRcInt64);
   2628    vassert(!hregIsVirtual(r));
   2629    n = hregEncoding(r);
   2630    vassert(n <= 30);
   2631    return n;
   2632 }
   2633 
   2634 static inline UInt dregEnc ( HReg r )
   2635 {
   2636    UInt n;
   2637    vassert(hregClass(r) == HRcFlt64);
   2638    vassert(!hregIsVirtual(r));
   2639    n = hregEncoding(r);
   2640    vassert(n <= 31);
   2641    return n;
   2642 }
   2643 
   2644 static inline UInt qregEnc ( HReg r )
   2645 {
   2646    UInt n;
   2647    vassert(hregClass(r) == HRcVec128);
   2648    vassert(!hregIsVirtual(r));
   2649    n = hregEncoding(r);
   2650    vassert(n <= 31);
   2651    return n;
   2652 }
   2653 
   2654 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
   2655    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
   2656 
   2657 #define X00  BITS4(0,0, 0,0)
   2658 #define X01  BITS4(0,0, 0,1)
   2659 #define X10  BITS4(0,0, 1,0)
   2660 #define X11  BITS4(0,0, 1,1)
   2661 
   2662 #define X000 BITS4(0, 0,0,0)
   2663 #define X001 BITS4(0, 0,0,1)
   2664 #define X010 BITS4(0, 0,1,0)
   2665 #define X011 BITS4(0, 0,1,1)
   2666 #define X100 BITS4(0, 1,0,0)
   2667 #define X101 BITS4(0, 1,0,1)
   2668 #define X110 BITS4(0, 1,1,0)
   2669 #define X111 BITS4(0, 1,1,1)
   2670 
   2671 #define X0000 BITS4(0,0,0,0)
   2672 #define X0001 BITS4(0,0,0,1)
   2673 #define X0010 BITS4(0,0,1,0)
   2674 #define X0011 BITS4(0,0,1,1)
   2675 
   2676 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
   2677   ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
   2678 
   2679 #define X00000   BITS8(0,0,0, 0,0,0,0,0)
   2680 #define X00001   BITS8(0,0,0, 0,0,0,0,1)
   2681 #define X00110   BITS8(0,0,0, 0,0,1,1,0)
   2682 #define X00111   BITS8(0,0,0, 0,0,1,1,1)
   2683 #define X01000   BITS8(0,0,0, 0,1,0,0,0)
   2684 #define X10000   BITS8(0,0,0, 1,0,0,0,0)
   2685 #define X11000   BITS8(0,0,0, 1,1,0,0,0)
   2686 #define X11110   BITS8(0,0,0, 1,1,1,1,0)
   2687 #define X11111   BITS8(0,0,0, 1,1,1,1,1)
   2688 
   2689 #define X000000  BITS8(0,0, 0,0,0,0,0,0)
   2690 #define X000001  BITS8(0,0, 0,0,0,0,0,1)
   2691 #define X000010  BITS8(0,0, 0,0,0,0,1,0)
   2692 #define X000011  BITS8(0,0, 0,0,0,0,1,1)
   2693 #define X000100  BITS8(0,0, 0,0,0,1,0,0)
   2694 #define X000110  BITS8(0,0, 0,0,0,1,1,0)
   2695 #define X000111  BITS8(0,0, 0,0,0,1,1,1)
   2696 #define X001000  BITS8(0,0, 0,0,1,0,0,0)
   2697 #define X001001  BITS8(0,0, 0,0,1,0,0,1)
   2698 #define X001010  BITS8(0,0, 0,0,1,0,1,0)
   2699 #define X001011  BITS8(0,0, 0,0,1,0,1,1)
   2700 #define X001101  BITS8(0,0, 0,0,1,1,0,1)
   2701 #define X001110  BITS8(0,0, 0,0,1,1,1,0)
   2702 #define X001111  BITS8(0,0, 0,0,1,1,1,1)
   2703 #define X010000  BITS8(0,0, 0,1,0,0,0,0)
   2704 #define X010001  BITS8(0,0, 0,1,0,0,0,1)
   2705 #define X010010  BITS8(0,0, 0,1,0,0,1,0)
   2706 #define X010011  BITS8(0,0, 0,1,0,0,1,1)
   2707 #define X010101  BITS8(0,0, 0,1,0,1,0,1)
   2708 #define X010110  BITS8(0,0, 0,1,0,1,1,0)
   2709 #define X010111  BITS8(0,0, 0,1,0,1,1,1)
   2710 #define X011001  BITS8(0,0, 0,1,1,0,0,1)
   2711 #define X011010  BITS8(0,0, 0,1,1,0,1,0)
   2712 #define X011011  BITS8(0,0, 0,1,1,0,1,1)
   2713 #define X011101  BITS8(0,0, 0,1,1,1,0,1)
   2714 #define X011110  BITS8(0,0, 0,1,1,1,1,0)
   2715 #define X011111  BITS8(0,0, 0,1,1,1,1,1)
   2716 #define X100001  BITS8(0,0, 1,0,0,0,0,1)
   2717 #define X100011  BITS8(0,0, 1,0,0,0,1,1)
   2718 #define X100100  BITS8(0,0, 1,0,0,1,0,0)
   2719 #define X100101  BITS8(0,0, 1,0,0,1,0,1)
   2720 #define X100110  BITS8(0,0, 1,0,0,1,1,0)
   2721 #define X100111  BITS8(0,0, 1,0,0,1,1,1)
   2722 #define X101101  BITS8(0,0, 1,0,1,1,0,1)
   2723 #define X101110  BITS8(0,0, 1,0,1,1,1,0)
   2724 #define X110000  BITS8(0,0, 1,1,0,0,0,0)
   2725 #define X110001  BITS8(0,0, 1,1,0,0,0,1)
   2726 #define X110010  BITS8(0,0, 1,1,0,0,1,0)
   2727 #define X110100  BITS8(0,0, 1,1,0,1,0,0)
   2728 #define X110101  BITS8(0,0, 1,1,0,1,0,1)
   2729 #define X110110  BITS8(0,0, 1,1,0,1,1,0)
   2730 #define X110111  BITS8(0,0, 1,1,0,1,1,1)
   2731 #define X111000  BITS8(0,0, 1,1,1,0,0,0)
   2732 #define X111001  BITS8(0,0, 1,1,1,0,0,1)
   2733 #define X111101  BITS8(0,0, 1,1,1,1,0,1)
   2734 #define X111110  BITS8(0,0, 1,1,1,1,1,0)
   2735 #define X111111  BITS8(0,0, 1,1,1,1,1,1)
   2736 
   2737 #define X0001000  BITS8(0, 0,0,0,1,0,0,0)
   2738 #define X0010000  BITS8(0, 0,0,1,0,0,0,0)
   2739 #define X0100000  BITS8(0, 0,1,0,0,0,0,0)
   2740 #define X1000000  BITS8(0, 1,0,0,0,0,0,0)
   2741 
   2742 #define X00100000  BITS8(0,0,1,0,0,0,0,0)
   2743 #define X00100001  BITS8(0,0,1,0,0,0,0,1)
   2744 #define X00100010  BITS8(0,0,1,0,0,0,1,0)
   2745 #define X00100011  BITS8(0,0,1,0,0,0,1,1)
   2746 #define X01010000  BITS8(0,1,0,1,0,0,0,0)
   2747 #define X01010001  BITS8(0,1,0,1,0,0,0,1)
   2748 #define X01010100  BITS8(0,1,0,1,0,1,0,0)
   2749 #define X01011000  BITS8(0,1,0,1,1,0,0,0)
   2750 #define X01100000  BITS8(0,1,1,0,0,0,0,0)
   2751 #define X01100001  BITS8(0,1,1,0,0,0,0,1)
   2752 #define X01100010  BITS8(0,1,1,0,0,0,1,0)
   2753 #define X01100011  BITS8(0,1,1,0,0,0,1,1)
   2754 #define X01110000  BITS8(0,1,1,1,0,0,0,0)
   2755 #define X01110001  BITS8(0,1,1,1,0,0,0,1)
   2756 #define X01110010  BITS8(0,1,1,1,0,0,1,0)
   2757 #define X01110011  BITS8(0,1,1,1,0,0,1,1)
   2758 #define X01110100  BITS8(0,1,1,1,0,1,0,0)
   2759 #define X01110101  BITS8(0,1,1,1,0,1,0,1)
   2760 #define X01110110  BITS8(0,1,1,1,0,1,1,0)
   2761 #define X01110111  BITS8(0,1,1,1,0,1,1,1)
   2762 #define X11000001  BITS8(1,1,0,0,0,0,0,1)
   2763 #define X11000011  BITS8(1,1,0,0,0,0,1,1)
   2764 #define X11010100  BITS8(1,1,0,1,0,1,0,0)
   2765 #define X11010110  BITS8(1,1,0,1,0,1,1,0)
   2766 #define X11011000  BITS8(1,1,0,1,1,0,0,0)
   2767 #define X11011010  BITS8(1,1,0,1,1,0,1,0)
   2768 #define X11011110  BITS8(1,1,0,1,1,1,1,0)
   2769 #define X11100010  BITS8(1,1,1,0,0,0,1,0)
   2770 #define X11110001  BITS8(1,1,1,1,0,0,0,1)
   2771 #define X11110011  BITS8(1,1,1,1,0,0,1,1)
   2772 #define X11110101  BITS8(1,1,1,1,0,1,0,1)
   2773 #define X11110111  BITS8(1,1,1,1,0,1,1,1)
   2774 
   2775 
   2776 /* --- 4 fields --- */
   2777 
   2778 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
   2779    vassert(8+19+1+4 == 32);
   2780    vassert(f1 < (1<<8));
   2781    vassert(f2 < (1<<19));
   2782    vassert(f3 < (1<<1));
   2783    vassert(f4 < (1<<4));
   2784    UInt w = 0;
   2785    w = (w <<  8) | f1;
   2786    w = (w << 19) | f2;
   2787    w = (w <<  1) | f3;
   2788    w = (w <<  4) | f4;
   2789    return w;
   2790 }
   2791 
   2792 /* --- 5 fields --- */
   2793 
   2794 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
   2795                                   UInt f3, UInt f4, UInt f5 ) {
   2796    vassert(3+6+2+16+5 == 32);
   2797    vassert(f1 < (1<<3));
   2798    vassert(f2 < (1<<6));
   2799    vassert(f3 < (1<<2));
   2800    vassert(f4 < (1<<16));
   2801    vassert(f5 < (1<<5));
   2802    UInt w = 0;
   2803    w = (w <<  3) | f1;
   2804    w = (w <<  6) | f2;
   2805    w = (w <<  2) | f3;
   2806    w = (w << 16) | f4;
   2807    w = (w <<  5) | f5;
   2808    return w;
   2809 }
   2810 
   2811 /* --- 6 fields --- */
   2812 
   2813 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
   2814                                     UInt f4, UInt f5, UInt f6 ) {
   2815    vassert(2+6+2+12+5+5 == 32);
   2816    vassert(f1 < (1<<2));
   2817    vassert(f2 < (1<<6));
   2818    vassert(f3 < (1<<2));
   2819    vassert(f4 < (1<<12));
   2820    vassert(f5 < (1<<5));
   2821    vassert(f6 < (1<<5));
   2822    UInt w = 0;
   2823    w = (w <<  2) | f1;
   2824    w = (w <<  6) | f2;
   2825    w = (w <<  2) | f3;
   2826    w = (w << 12) | f4;
   2827    w = (w <<  5) | f5;
   2828    w = (w <<  5) | f6;
   2829    return w;
   2830 }
   2831 
   2832 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
   2833                                    UInt f4, UInt f5, UInt f6 ) {
   2834    vassert(3+8+5+6+5+5 == 32);
   2835    vassert(f1 < (1<<3));
   2836    vassert(f2 < (1<<8));
   2837    vassert(f3 < (1<<5));
   2838    vassert(f4 < (1<<6));
   2839    vassert(f5 < (1<<5));
   2840    vassert(f6 < (1<<5));
   2841    UInt w = 0;
   2842    w = (w <<  3) | f1;
   2843    w = (w <<  8) | f2;
   2844    w = (w <<  5) | f3;
   2845    w = (w <<  6) | f4;
   2846    w = (w <<  5) | f5;
   2847    w = (w <<  5) | f6;
   2848    return w;
   2849 }
   2850 
   2851 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
   2852                                    UInt f4, UInt f5, UInt f6 ) {
   2853    vassert(3+8+5+6+5+5 == 32);
   2854    vassert(f1 < (1<<3));
   2855    vassert(f2 < (1<<5));
   2856    vassert(f3 < (1<<8));
   2857    vassert(f4 < (1<<6));
   2858    vassert(f5 < (1<<5));
   2859    vassert(f6 < (1<<5));
   2860    UInt w = 0;
   2861    w = (w <<  3) | f1;
   2862    w = (w <<  5) | f2;
   2863    w = (w <<  8) | f3;
   2864    w = (w <<  6) | f4;
   2865    w = (w <<  5) | f5;
   2866    w = (w <<  5) | f6;
   2867    return w;
   2868 }
   2869 
   2870 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
   2871                                    UInt f4, UInt f5, UInt f6 ) {
   2872    vassert(3+6+7+6+5+5 == 32);
   2873    vassert(f1 < (1<<3));
   2874    vassert(f2 < (1<<6));
   2875    vassert(f3 < (1<<7));
   2876    vassert(f4 < (1<<6));
   2877    vassert(f5 < (1<<5));
   2878    vassert(f6 < (1<<5));
   2879    UInt w = 0;
   2880    w = (w <<  3) | f1;
   2881    w = (w <<  6) | f2;
   2882    w = (w <<  7) | f3;
   2883    w = (w <<  6) | f4;
   2884    w = (w <<  5) | f5;
   2885    w = (w <<  5) | f6;
   2886    return w;
   2887 }
   2888 
   2889 /* --- 7 fields --- */
   2890 
   2891 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
   2892                                      UInt f4, UInt f5, UInt f6, UInt f7 ) {
   2893    vassert(2+6+3+9+2+5+5 == 32);
   2894    vassert(f1 < (1<<2));
   2895    vassert(f2 < (1<<6));
   2896    vassert(f3 < (1<<3));
   2897    vassert(f4 < (1<<9));
   2898    vassert(f5 < (1<<2));
   2899    vassert(f6 < (1<<5));
   2900    vassert(f7 < (1<<5));
   2901    UInt w = 0;
   2902    w = (w << 2) | f1;
   2903    w = (w << 6) | f2;
   2904    w = (w << 3) | f3;
   2905    w = (w << 9) | f4;
   2906    w = (w << 2) | f5;
   2907    w = (w << 5) | f6;
   2908    w = (w << 5) | f7;
   2909    return w;
   2910 }
   2911 
   2912 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
   2913                                      UInt f4, UInt f5, UInt f6, UInt f7 ) {
   2914    vassert(3+6+1+6+6+5+5 == 32);
   2915    vassert(f1 < (1<<3));
   2916    vassert(f2 < (1<<6));
   2917    vassert(f3 < (1<<1));
   2918    vassert(f4 < (1<<6));
   2919    vassert(f5 < (1<<6));
   2920    vassert(f6 < (1<<5));
   2921    vassert(f7 < (1<<5));
   2922    UInt w = 0;
   2923    w = (w << 3) | f1;
   2924    w = (w << 6) | f2;
   2925    w = (w << 1) | f3;
   2926    w = (w << 6) | f4;
   2927    w = (w << 6) | f5;
   2928    w = (w << 5) | f6;
   2929    w = (w << 5) | f7;
   2930    return w;
   2931 }
   2932 
   2933 
   2934 //ZZ #define X0000  BITS4(0,0,0,0)
   2935 //ZZ #define X0001  BITS4(0,0,0,1)
   2936 //ZZ #define X0010  BITS4(0,0,1,0)
   2937 //ZZ #define X0011  BITS4(0,0,1,1)
   2938 //ZZ #define X0100  BITS4(0,1,0,0)
   2939 //ZZ #define X0101  BITS4(0,1,0,1)
   2940 //ZZ #define X0110  BITS4(0,1,1,0)
   2941 //ZZ #define X0111  BITS4(0,1,1,1)
   2942 //ZZ #define X1000  BITS4(1,0,0,0)
   2943 //ZZ #define X1001  BITS4(1,0,0,1)
   2944 //ZZ #define X1010  BITS4(1,0,1,0)
   2945 //ZZ #define X1011  BITS4(1,0,1,1)
   2946 //ZZ #define X1100  BITS4(1,1,0,0)
   2947 //ZZ #define X1101  BITS4(1,1,0,1)
   2948 //ZZ #define X1110  BITS4(1,1,1,0)
   2949 //ZZ #define X1111  BITS4(1,1,1,1)
   2950 /*
   2951 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
   2952    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2953     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2954     (((zzx3) & 0xF) << 12))
   2955 
   2956 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
   2957    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2958     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2959     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
   2960 
   2961 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
   2962    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2963     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2964     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
   2965 
   2966 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
   2967   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
   2968    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
   2969    (((zzx0) & 0xF) << 0))
   2970 
   2971 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
   2972    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2973     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2974     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
   2975     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
   2976 
   2977 #define XX______(zzx7,zzx6) \
   2978    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
   2979 */
   2980 
   2981 
   2982 /* Get an immediate into a register, using only that register. */
   2983 static UInt* imm64_to_ireg ( UInt* p, Int xD, ULong imm64 )
   2984 {
   2985    if (imm64 == 0) {
   2986       // This has to be special-cased, since the logic below
   2987       // will leave the register unchanged in this case.
   2988       // MOVZ xD, #0, LSL #0
   2989       *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
   2990       return p;
   2991    }
   2992 
   2993    // There must be at least one non-zero halfword.  Find the
   2994    // lowest nonzero such, and use MOVZ to install it and zero
   2995    // out the rest of the register.
   2996    UShort h[4];
   2997    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   2998    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   2999    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   3000    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   3001 
   3002    UInt i;
   3003    for (i = 0; i < 4; i++) {
   3004       if (h[i] != 0)
   3005          break;
   3006    }
   3007    vassert(i < 4);
   3008 
   3009    // MOVZ xD, h[i], LSL (16*i)
   3010    *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   3011 
   3012    // Work on upwards through h[i], using MOVK to stuff in any
   3013    // remaining nonzero elements.
   3014    i++;
   3015    for (; i < 4; i++) {
   3016       if (h[i] == 0)
   3017          continue;
   3018       // MOVK xD, h[i], LSL (16*i)
   3019       *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   3020    }
   3021 
   3022    return p;
   3023 }
   3024 
   3025 /* Get an immediate into a register, using only that register, and
   3026    generating exactly 4 instructions, regardless of the value of the
   3027    immediate. This is used when generating sections of code that need
   3028    to be patched later, so as to guarantee a specific size. */
   3029 static UInt* imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
   3030 {
   3031    UShort h[4];
   3032    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   3033    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   3034    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   3035    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   3036    // Work on upwards through h[i], using MOVK to stuff in the
   3037    // remaining elements.
   3038    UInt i;
   3039    for (i = 0; i < 4; i++) {
   3040       if (i == 0) {
   3041          // MOVZ xD, h[0], LSL (16*0)
   3042          *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   3043       } else {
   3044          // MOVK xD, h[i], LSL (16*i)
   3045          *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   3046       }
   3047    }
   3048    return p;
   3049 }
   3050 
   3051 /* Check whether p points at a 4-insn sequence cooked up by
   3052    imm64_to_ireg_EXACTLY4(). */
   3053 static Bool is_imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
   3054 {
   3055    UShort h[4];
   3056    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   3057    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   3058    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   3059    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   3060    // Work on upwards through h[i], using MOVK to stuff in the
   3061    // remaining elements.
   3062    UInt i;
   3063    for (i = 0; i < 4; i++) {
   3064       UInt expected;
   3065       if (i == 0) {
   3066          // MOVZ xD, h[0], LSL (16*0)
   3067          expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   3068       } else {
   3069          // MOVK xD, h[i], LSL (16*i)
   3070          expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   3071       }
   3072       if (p[i] != expected)
   3073          return False;
   3074    }
   3075    return True;
   3076 }
   3077 
   3078 
   3079 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
   3080    rD, using the given amode for the address. */
   3081 static UInt* do_load_or_store8 ( UInt* p,
   3082                                  Bool isLoad, UInt wD, ARM64AMode* am )
   3083 {
   3084    vassert(wD <= 30);
   3085    if (am->tag == ARM64am_RI9) {
   3086       /* STURB Wd, [Xn|SP + simm9]:  00 111000 000 simm9 00 n d
   3087          LDURB Wd, [Xn|SP + simm9]:  00 111000 010 simm9 00 n d
   3088       */
   3089       Int simm9 = am->ARM64am.RI9.simm9;
   3090       vassert(-256 <= simm9 && simm9 <= 255);
   3091       UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
   3092                                    simm9 & 0x1FF, X00,
   3093                                    iregEnc(am->ARM64am.RI9.reg), wD);
   3094       *p++ = instr;
   3095       return p;
   3096    }
   3097    if (am->tag == ARM64am_RI12) {
   3098       /* STRB Wd, [Xn|SP + uimm12 * 1]:  00 111 001 00 imm12 n d
   3099          LDRB Wd, [Xn|SP + uimm12 * 1]:  00 111 001 01 imm12 n d
   3100       */
   3101       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3102       UInt scale  = am->ARM64am.RI12.szB;
   3103       vassert(scale == 1); /* failure of this is serious.  Do not ignore. */
   3104       UInt xN    = iregEnc(am->ARM64am.RI12.reg);
   3105       vassert(xN <= 30);
   3106       UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
   3107                                   uimm12, xN, wD);
   3108       *p++ = instr;
   3109       return p;
   3110    }
   3111    if (am->tag == ARM64am_RR) {
   3112       /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
   3113          LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
   3114       */
   3115       UInt xN = iregEnc(am->ARM64am.RR.base);
   3116       UInt xM = iregEnc(am->ARM64am.RR.index);
   3117       vassert(xN <= 30);
   3118       UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
   3119                                  xM, X011010, xN, wD);
   3120       *p++ = instr;
   3121       return p;
   3122    }
   3123    vpanic("do_load_or_store8");
   3124    vassert(0);
   3125 }
   3126 
   3127 
   3128 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
   3129    rD, using the given amode for the address. */
   3130 static UInt* do_load_or_store16 ( UInt* p,
   3131                                   Bool isLoad, UInt wD, ARM64AMode* am )
   3132 {
   3133    vassert(wD <= 30);
   3134    if (am->tag == ARM64am_RI9) {
   3135       /* STURH Wd, [Xn|SP + simm9]:  01 111000 000 simm9 00 n d
   3136          LDURH Wd, [Xn|SP + simm9]:  01 111000 010 simm9 00 n d
   3137       */
   3138       Int simm9 = am->ARM64am.RI9.simm9;
   3139       vassert(-256 <= simm9 && simm9 <= 255);
   3140       UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
   3141                                    simm9 & 0x1FF, X00,
   3142                                    iregEnc(am->ARM64am.RI9.reg), wD);
   3143       *p++ = instr;
   3144       return p;
   3145    }
   3146    if (am->tag == ARM64am_RI12) {
   3147       /* STRH Wd, [Xn|SP + uimm12 * 2]:  01 111 001 00 imm12 n d
   3148          LDRH Wd, [Xn|SP + uimm12 * 2]:  01 111 001 01 imm12 n d
   3149       */
   3150       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3151       UInt scale  = am->ARM64am.RI12.szB;
   3152       vassert(scale == 2); /* failure of this is serious.  Do not ignore. */
   3153       UInt xN    = iregEnc(am->ARM64am.RI12.reg);
   3154       vassert(xN <= 30);
   3155       UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
   3156                                   uimm12, xN, wD);
   3157       *p++ = instr;
   3158       return p;
   3159    }
   3160    if (am->tag == ARM64am_RR) {
   3161       /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
   3162          LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
   3163       */
   3164       UInt xN = iregEnc(am->ARM64am.RR.base);
   3165       UInt xM = iregEnc(am->ARM64am.RR.index);
   3166       vassert(xN <= 30);
   3167       UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
   3168                                  xM, X011010, xN, wD);
   3169       *p++ = instr;
   3170       return p;
   3171    }
   3172    vpanic("do_load_or_store16");
   3173    vassert(0);
   3174 }
   3175 
   3176 
   3177 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
   3178    rD, using the given amode for the address. */
   3179 static UInt* do_load_or_store32 ( UInt* p,
   3180                                   Bool isLoad, UInt wD, ARM64AMode* am )
   3181 {
   3182    vassert(wD <= 30);
   3183    if (am->tag == ARM64am_RI9) {
   3184       /* STUR Wd, [Xn|SP + simm9]:  10 111000 000 simm9 00 n d
   3185          LDUR Wd, [Xn|SP + simm9]:  10 111000 010 simm9 00 n d
   3186       */
   3187       Int simm9 = am->ARM64am.RI9.simm9;
   3188       vassert(-256 <= simm9 && simm9 <= 255);
   3189       UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
   3190                                    simm9 & 0x1FF, X00,
   3191                                    iregEnc(am->ARM64am.RI9.reg), wD);
   3192       *p++ = instr;
   3193       return p;
   3194    }
   3195    if (am->tag == ARM64am_RI12) {
   3196       /* STR Wd, [Xn|SP + uimm12 * 4]:  10 111 001 00 imm12 n d
   3197          LDR Wd, [Xn|SP + uimm12 * 4]:  10 111 001 01 imm12 n d
   3198       */
   3199       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3200       UInt scale  = am->ARM64am.RI12.szB;
   3201       vassert(scale == 4); /* failure of this is serious.  Do not ignore. */
   3202       UInt xN    = iregEnc(am->ARM64am.RI12.reg);
   3203       vassert(xN <= 30);
   3204       UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
   3205                                   uimm12, xN, wD);
   3206       *p++ = instr;
   3207       return p;
   3208    }
   3209    if (am->tag == ARM64am_RR) {
   3210       /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
   3211          LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
   3212       */
   3213       UInt xN = iregEnc(am->ARM64am.RR.base);
   3214       UInt xM = iregEnc(am->ARM64am.RR.index);
   3215       vassert(xN <= 30);
   3216       UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
   3217                                  xM, X011010, xN, wD);
   3218       *p++ = instr;
   3219       return p;
   3220    }
   3221    vpanic("do_load_or_store32");
   3222    vassert(0);
   3223 }
   3224 
   3225 
   3226 /* Generate a 64 bit load or store to/from xD, using the given amode
   3227    for the address. */
   3228 static UInt* do_load_or_store64 ( UInt* p,
   3229                                   Bool isLoad, UInt xD, ARM64AMode* am )
   3230 {
   3231    /* In all these cases, Rn can't be 31 since that means SP. */
   3232    vassert(xD <= 30);
   3233    if (am->tag == ARM64am_RI9) {
   3234       /* STUR Xd, [Xn|SP + simm9]:  11 111000 000 simm9 00 n d
   3235          LDUR Xd, [Xn|SP + simm9]:  11 111000 010 simm9 00 n d
   3236       */
   3237       Int simm9 = am->ARM64am.RI9.simm9;
   3238       vassert(-256 <= simm9 && simm9 <= 255);
   3239       UInt xN = iregEnc(am->ARM64am.RI9.reg);
   3240       vassert(xN <= 30);
   3241       UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
   3242                                    simm9 & 0x1FF, X00, xN, xD);
   3243       *p++ = instr;
   3244       return p;
   3245    }
   3246    if (am->tag == ARM64am_RI12) {
   3247       /* STR Xd, [Xn|SP + uimm12 * 8]:  11 111 001 00 imm12 n d
   3248          LDR Xd, [Xn|SP + uimm12 * 8]:  11 111 001 01 imm12 n d
   3249       */
   3250       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3251       UInt scale  = am->ARM64am.RI12.szB;
   3252       vassert(scale == 8); /* failure of this is serious.  Do not ignore. */
   3253       UInt xN    = iregEnc(am->ARM64am.RI12.reg);
   3254       vassert(xN <= 30);
   3255       UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
   3256                                   uimm12, xN, xD);
   3257       *p++ = instr;
   3258       return p;
   3259    }
   3260    if (am->tag == ARM64am_RR) {
   3261       /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
   3262          LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
   3263       */
   3264       UInt xN = iregEnc(am->ARM64am.RR.base);
   3265       UInt xM = iregEnc(am->ARM64am.RR.index);
   3266       vassert(xN <= 30);
   3267       UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
   3268                                  xM, X011010, xN, xD);
   3269       *p++ = instr;
   3270       return p;
   3271    }
   3272    vpanic("do_load_or_store64");
   3273    vassert(0);
   3274 }
   3275 
   3276 
   3277 /* Emit an instruction into buf and return the number of bytes used.
   3278    Note that buf is not the insn's final place, and therefore it is
   3279    imperative to emit position-independent code.  If the emitted
   3280    instruction was a profiler inc, set *is_profInc to True, else
   3281    leave it unchanged. */
   3282 
   3283 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
   3284                       UChar* buf, Int nbuf, const ARM64Instr* i,
   3285                       Bool mode64, VexEndness endness_host,
   3286                       const void* disp_cp_chain_me_to_slowEP,
   3287                       const void* disp_cp_chain_me_to_fastEP,
   3288                       const void* disp_cp_xindir,
   3289                       const void* disp_cp_xassisted )
   3290 {
   3291    UInt* p = (UInt*)buf;
   3292    vassert(nbuf >= 32);
   3293    vassert(mode64 == True);
   3294    vassert(0 == (((HWord)buf) & 3));
   3295 
   3296    switch (i->tag) {
   3297       case ARM64in_Arith: {
   3298          UInt      rD   = iregEnc(i->ARM64in.Arith.dst);
   3299          UInt      rN   = iregEnc(i->ARM64in.Arith.argL);
   3300          ARM64RIA* argR = i->ARM64in.Arith.argR;
   3301          switch (argR->tag) {
   3302             case ARM64riA_I12:
   3303                *p++ = X_2_6_2_12_5_5(
   3304                          i->ARM64in.Arith.isAdd ? X10 : X11,
   3305                          X010001,
   3306                          argR->ARM64riA.I12.shift == 12 ? X01 : X00,
   3307                          argR->ARM64riA.I12.imm12, rN, rD
   3308                       );
   3309                break;
   3310             case ARM64riA_R: {
   3311                UInt rM = iregEnc(i->ARM64in.Arith.argR->ARM64riA.R.reg);
   3312                *p++ = X_3_8_5_6_5_5(
   3313                          i->ARM64in.Arith.isAdd ? X100 : X110,
   3314                          X01011000, rM, X000000, rN, rD
   3315                       );
   3316                break;
   3317             }
   3318             default:
   3319                goto bad;
   3320          }
   3321          goto done;
   3322       }
   3323       case ARM64in_Cmp: {
   3324          UInt      rD   = 31; /* XZR, we are going to dump the result */
   3325          UInt      rN   = iregEnc(i->ARM64in.Cmp.argL);
   3326          ARM64RIA* argR = i->ARM64in.Cmp.argR;
   3327          Bool      is64 = i->ARM64in.Cmp.is64;
   3328          switch (argR->tag) {
   3329             case ARM64riA_I12:
   3330                /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
   3331                /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
   3332                *p++ = X_2_6_2_12_5_5(
   3333                          is64 ? X11 : X01, X110001,
   3334                          argR->ARM64riA.I12.shift == 12 ? X01 : X00,
   3335                          argR->ARM64riA.I12.imm12, rN, rD);
   3336                break;
   3337             case ARM64riA_R: {
   3338                /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
   3339                /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
   3340                UInt rM = iregEnc(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
   3341                *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
   3342                                     X01011000, rM, X000000, rN, rD);
   3343                break;
   3344             }
   3345             default:
   3346                goto bad;
   3347          }
   3348          goto done;
   3349       }
   3350       case ARM64in_Logic: {
   3351          UInt      rD   = iregEnc(i->ARM64in.Logic.dst);
   3352          UInt      rN   = iregEnc(i->ARM64in.Logic.argL);
   3353          ARM64RIL* argR = i->ARM64in.Logic.argR;
   3354          UInt      opc  = 0; /* invalid */
   3355          vassert(rD < 31);
   3356          vassert(rN < 31);
   3357          switch (i->ARM64in.Logic.op) {
   3358             case ARM64lo_OR:  opc = X101; break;
   3359             case ARM64lo_AND: opc = X100; break;
   3360             case ARM64lo_XOR: opc = X110; break;
   3361             default: break;
   3362          }
   3363          vassert(opc != 0);
   3364          switch (argR->tag) {
   3365             case ARM64riL_I13: {
   3366                /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
   3367                /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
   3368                /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
   3369                *p++ = X_3_6_1_6_6_5_5(
   3370                          opc, X100100, argR->ARM64riL.I13.bitN,
   3371                          argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
   3372                          rN, rD
   3373                       );
   3374                break;
   3375             }
   3376             case ARM64riL_R: {
   3377                /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
   3378                /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
   3379                /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
   3380                UInt rM = iregEnc(argR->ARM64riL.R.reg);
   3381                vassert(rM < 31);
   3382                *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
   3383                break;
   3384             }
   3385             default:
   3386                goto bad;
   3387          }
   3388          goto done;
   3389       }
   3390       case ARM64in_Test: {
   3391          UInt      rD   = 31; /* XZR, we are going to dump the result */
   3392          UInt      rN   = iregEnc(i->ARM64in.Test.argL);
   3393          ARM64RIL* argR = i->ARM64in.Test.argR;
   3394          switch (argR->tag) {
   3395             case ARM64riL_I13: {
   3396                /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
   3397                *p++ = X_3_6_1_6_6_5_5(
   3398                          X111, X100100, argR->ARM64riL.I13.bitN,
   3399                          argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
   3400                          rN, rD
   3401                       );
   3402                break;
   3403             }
   3404             default:
   3405                goto bad;
   3406          }
   3407          goto done;
   3408       }
   3409       case ARM64in_Shift: {
   3410          UInt      rD   = iregEnc(i->ARM64in.Shift.dst);
   3411          UInt      rN   = iregEnc(i->ARM64in.Shift.argL);
   3412          ARM64RI6* argR = i->ARM64in.Shift.argR;
   3413          vassert(rD < 31);
   3414          vassert(rN < 31);
   3415          switch (argR->tag) {
   3416             case ARM64ri6_I6: {
   3417                /* 110 1001101 (63-sh) (64-sh) nn dd   LSL Xd, Xn, sh */
   3418                /* 110 1001101 sh      63      nn dd   LSR Xd, Xn, sh */
   3419                /* 100 1001101 sh      63      nn dd   ASR Xd, Xn, sh */
   3420                UInt sh = argR->ARM64ri6.I6.imm6;
   3421                vassert(sh > 0 && sh < 64);
   3422                switch (i->ARM64in.Shift.op) {
   3423                   case ARM64sh_SHL:
   3424                      *p++ = X_3_6_1_6_6_5_5(X110, X100110,
   3425                                             1, 64-sh, 63-sh, rN, rD);
   3426                      break;
   3427                   case ARM64sh_SHR:
   3428                      *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
   3429                      break;
   3430                   case ARM64sh_SAR:
   3431                      *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
   3432                      break;
   3433                   default:
   3434                      vassert(0);
   3435                }
   3436                break;
   3437             }
   3438             case ARM64ri6_R: {
   3439                /* 100 1101 0110 mm 001000 nn dd   LSL Xd, Xn, Xm */
   3440                /* 100 1101 0110 mm 001001 nn dd   LSR Xd, Xn, Xm */
   3441                /* 100 1101 0110 mm 001010 nn dd   ASR Xd, Xn, Xm */
   3442                UInt rM = iregEnc(argR->ARM64ri6.R.reg);
   3443                vassert(rM < 31);
   3444                UInt subOpc = 0;
   3445                switch (i->ARM64in.Shift.op) {
   3446                   case ARM64sh_SHL: subOpc = X001000; break;
   3447                   case ARM64sh_SHR: subOpc = X001001; break;
   3448                   case ARM64sh_SAR: subOpc = X001010; break;
   3449                   default: vassert(0);
   3450                }
   3451                *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
   3452                break;
   3453             }
   3454             default:
   3455                vassert(0);
   3456          }
   3457          goto done;
   3458       }
   3459       case ARM64in_Unary: {
   3460          UInt rDst = iregEnc(i->ARM64in.Unary.dst);
   3461          UInt rSrc = iregEnc(i->ARM64in.Unary.src);
   3462          switch (i->ARM64in.Unary.op) {
   3463             case ARM64un_CLZ:
   3464                /* 1 10 1101 0110 00000 00010 0 nn dd   CLZ Xd, Xn */
   3465                /* 1 10 1101 0110 00000 00010 1 nn dd   CLS Xd, Xn (unimp) */
   3466                *p++ = X_3_8_5_6_5_5(X110,
   3467                                     X11010110, X00000, X000100, rSrc, rDst);
   3468                goto done;
   3469             case ARM64un_NEG:
   3470                /* 1 10 01011 000 m 000000 11111 d  NEG Xd,Xm */
   3471                /* 0 10 01011 000 m 000000 11111 d  NEG Wd,Wm (unimp) */
   3472                *p++ = X_3_8_5_6_5_5(X110,
   3473                                     X01011000, rSrc, X000000, X11111, rDst);
   3474                goto done;
   3475             case ARM64un_NOT: {
   3476                /* 1 01 01010 00 1 m 000000 11111 d   MVN Xd,Xm */
   3477                *p++ = X_3_8_5_6_5_5(X101,
   3478                                     X01010001, rSrc, X000000, X11111, rDst);
   3479                goto done;
   3480             }
   3481             default:
   3482                break;
   3483          }
   3484          goto bad;
   3485       }
   3486       case ARM64in_MovI: {
   3487          /* We generate the "preferred form", ORR Xd, XZR, Xm
   3488             101 01010 00 0 m 000000 11111 d
   3489          */
   3490          UInt instr = 0xAA0003E0;
   3491          UInt d     = iregEnc(i->ARM64in.MovI.dst);
   3492          UInt m     = iregEnc(i->ARM64in.MovI.src);
   3493          *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
   3494          goto done;
   3495       }
   3496       case ARM64in_Imm64: {
   3497          p = imm64_to_ireg( p, iregEnc(i->ARM64in.Imm64.dst),
   3498                                i->ARM64in.Imm64.imm64 );
   3499          goto done;
   3500       }
   3501       case ARM64in_LdSt64: {
   3502          p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
   3503                                  iregEnc(i->ARM64in.LdSt64.rD),
   3504                                  i->ARM64in.LdSt64.amode );
   3505          goto done;
   3506       }
   3507       case ARM64in_LdSt32: {
   3508          p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
   3509                                  iregEnc(i->ARM64in.LdSt32.rD),
   3510                                  i->ARM64in.LdSt32.amode );
   3511          goto done;
   3512       }
   3513       case ARM64in_LdSt16: {
   3514          p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
   3515                                  iregEnc(i->ARM64in.LdSt16.rD),
   3516                                  i->ARM64in.LdSt16.amode );
   3517          goto done;
   3518       }
   3519       case ARM64in_LdSt8: {
   3520          p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
   3521                                 iregEnc(i->ARM64in.LdSt8.rD),
   3522                                 i->ARM64in.LdSt8.amode );
   3523          goto done;
   3524       }
   3525 
   3526       case ARM64in_XDirect: {
   3527          /* NB: what goes on here has to be very closely coordinated
   3528             with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
   3529          /* We're generating chain-me requests here, so we need to be
   3530             sure this is actually allowed -- no-redir translations
   3531             can't use chain-me's.  Hence: */
   3532          vassert(disp_cp_chain_me_to_slowEP != NULL);
   3533          vassert(disp_cp_chain_me_to_fastEP != NULL);
   3534 
   3535          /* Use ptmp for backpatching conditional jumps. */
   3536          UInt* ptmp = NULL;
   3537 
   3538          /* First off, if this is conditional, create a conditional
   3539             jump over the rest of it.  Or at least, leave a space for
   3540             it that we will shortly fill in. */
   3541          if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
   3542             vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
   3543             ptmp = p;
   3544             *p++ = 0;
   3545          }
   3546 
   3547          /* Update the guest PC. */
   3548          /* imm64 x9, dstGA */
   3549          /* str   x9, amPC */
   3550          p = imm64_to_ireg(p, /*x*/9, i->ARM64in.XDirect.dstGA);
   3551          p = do_load_or_store64(p, False/*!isLoad*/,
   3552                                 /*x*/9, i->ARM64in.XDirect.amPC);
   3553 
   3554          /* --- FIRST PATCHABLE BYTE follows --- */
   3555          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
   3556             calling to) backs up the return address, so as to find the
   3557             address of the first patchable byte.  So: don't change the
   3558             number of instructions (5) below. */
   3559          /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
   3560          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
   3561          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
   3562          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
   3563          /* blr  x9 */
   3564          const void* disp_cp_chain_me
   3565                   = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
   3566                                                 : disp_cp_chain_me_to_slowEP;
   3567          p = imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
   3568          *p++ = 0xD63F0120;
   3569          /* --- END of PATCHABLE BYTES --- */
   3570 
   3571          /* Fix up the conditional jump, if there was one. */
   3572          if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
   3573             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3574             vassert(delta > 0 && delta < 40);
   3575             vassert((delta & 3) == 0);
   3576             UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
   3577             vassert(notCond <= 13); /* Neither AL nor NV */
   3578             vassert(ptmp != NULL);
   3579             delta = delta >> 2;
   3580             *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
   3581          }
   3582          goto done;
   3583       }
   3584 
   3585       case ARM64in_XIndir: {
   3586          // XIndir is more or less the same as XAssisted, except
   3587          // we don't have a trc value to hand back, so there's no
   3588          // write to r21
   3589          /* Use ptmp for backpatching conditional jumps. */
   3590          //UInt* ptmp = NULL;
   3591 
   3592          /* First off, if this is conditional, create a conditional
   3593             jump over the rest of it.  Or at least, leave a space for
   3594             it that we will shortly fill in. */
   3595          if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
   3596             vassert(0); //ATC
   3597 //ZZ             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
   3598 //ZZ             ptmp = p;
   3599 //ZZ             *p++ = 0;
   3600          }
   3601 
   3602          /* Update the guest PC. */
   3603          /* str r-dstGA, amPC */
   3604          p = do_load_or_store64(p, False/*!isLoad*/,
   3605                                 iregEnc(i->ARM64in.XIndir.dstGA),
   3606                                 i->ARM64in.XIndir.amPC);
   3607 
   3608          /* imm64 x9, VG_(disp_cp_xindir) */
   3609          /* br    x9 */
   3610          p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xindir);
   3611          *p++ = 0xD61F0120; /* br x9 */
   3612 
   3613          /* Fix up the conditional jump, if there was one. */
   3614          if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
   3615             vassert(0); //ATC
   3616 //ZZ             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3617 //ZZ             vassert(delta > 0 && delta < 40);
   3618 //ZZ             vassert((delta & 3) == 0);
   3619 //ZZ             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
   3620 //ZZ             vassert(notCond <= 13); /* Neither AL nor NV */
   3621 //ZZ             delta = (delta >> 2) - 2;
   3622 //ZZ             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3623          }
   3624          goto done;
   3625       }
   3626 
   3627       case ARM64in_XAssisted: {
   3628          /* Use ptmp for backpatching conditional jumps. */
   3629          UInt* ptmp = NULL;
   3630 
   3631          /* First off, if this is conditional, create a conditional
   3632             jump over the rest of it.  Or at least, leave a space for
   3633             it that we will shortly fill in.  I think this can only
   3634             ever happen when VEX is driven by the switchbacker. */
   3635          if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
   3636             vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
   3637             ptmp = p;
   3638             *p++ = 0;
   3639          }
   3640 
   3641          /* Update the guest PC. */
   3642          /* str r-dstGA, amPC */
   3643          p = do_load_or_store64(p, False/*!isLoad*/,
   3644                                 iregEnc(i->ARM64in.XAssisted.dstGA),
   3645                                 i->ARM64in.XAssisted.amPC);
   3646 
   3647          /* movw r21,  $magic_number */
   3648          UInt trcval = 0;
   3649          switch (i->ARM64in.XAssisted.jk) {
   3650             case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
   3651             case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
   3652             //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
   3653             case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
   3654             //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
   3655             //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
   3656             case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
   3657             case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
   3658             case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
   3659             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
   3660             case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
   3661             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
   3662             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
   3663             /* We don't expect to see the following being assisted. */
   3664             //case Ijk_Ret:
   3665             //case Ijk_Call:
   3666             /* fallthrough */
   3667             default:
   3668                ppIRJumpKind(i->ARM64in.XAssisted.jk);
   3669                vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
   3670                       "unexpected jump kind");
   3671          }
   3672          vassert(trcval != 0);
   3673          p = imm64_to_ireg(p, /*x*/21, (ULong)trcval);
   3674 
   3675          /* imm64 x9, VG_(disp_cp_xassisted) */
   3676          /* br    x9 */
   3677          p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xassisted);
   3678          *p++ = 0xD61F0120; /* br x9 */
   3679 
   3680          /* Fix up the conditional jump, if there was one. */
   3681          if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
   3682             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3683             vassert(delta > 0 && delta < 40);
   3684             vassert((delta & 3) == 0);
   3685             UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
   3686             vassert(notCond <= 13); /* Neither AL nor NV */
   3687             vassert(ptmp != NULL);
   3688             delta = delta >> 2;
   3689             *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
   3690          }
   3691          goto done;
   3692       }
   3693 
   3694       case ARM64in_CSel: {
   3695          /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
   3696          UInt dd   = iregEnc(i->ARM64in.CSel.dst);
   3697          UInt nn   = iregEnc(i->ARM64in.CSel.argL);
   3698          UInt mm   = iregEnc(i->ARM64in.CSel.argR);
   3699          UInt cond = (UInt)i->ARM64in.CSel.cond;
   3700          vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
   3701          *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
   3702          goto done;
   3703       }
   3704 
   3705       case ARM64in_Call: {
   3706          /* We'll use x9 as a scratch register to put the target
   3707             address in. */
   3708          if (i->ARM64in.Call.cond != ARM64cc_AL
   3709              && i->ARM64in.Call.rloc.pri != RLPri_None) {
   3710             /* The call might not happen (it isn't unconditional) and
   3711                it returns a result.  In this case we will need to
   3712                generate a control flow diamond to put 0x555..555 in
   3713                the return register(s) in the case where the call
   3714                doesn't happen.  If this ever becomes necessary, maybe
   3715                copy code from the 32-bit ARM equivalent.  Until that
   3716                day, just give up. */
   3717             goto bad;
   3718          }
   3719 
   3720          UInt* ptmp = NULL;
   3721          if (i->ARM64in.Call.cond != ARM64cc_AL) {
   3722             /* Create a hole to put a conditional branch in.  We'll
   3723                patch it once we know the branch length. */
   3724             ptmp = p;
   3725             *p++ = 0;
   3726          }
   3727 
   3728          // x9 = &target
   3729          p = imm64_to_ireg( (UInt*)p, /*x*/9, (ULong)i->ARM64in.Call.target );
   3730          // blr x9
   3731          *p++ = 0xD63F0120;
   3732 
   3733          // Patch the hole if necessary
   3734          if (i->ARM64in.Call.cond != ARM64cc_AL) {
   3735             ULong dist = (ULong)(p - ptmp);
   3736             /* imm64_to_ireg produces between 1 and 4 insns, and
   3737                then there's the BLR itself.  Hence: */
   3738             vassert(dist >= 2 && dist <= 5);
   3739             vassert(ptmp != NULL);
   3740             // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
   3741             *ptmp = X_8_19_1_4(X01010100, dist, 0,
   3742                                1 ^ (UInt)i->ARM64in.Call.cond);
   3743          } else {
   3744             vassert(ptmp == NULL);
   3745          }
   3746 
   3747          goto done;
   3748       }
   3749 
   3750       case ARM64in_AddToSP: {
   3751          /* 10,0 10001 00 imm12 11111 11111  ADD xsp, xsp, #imm12
   3752             11,0 10001 00 imm12 11111 11111  SUB xsp, xsp, #imm12
   3753          */
   3754          Int simm12 = i->ARM64in.AddToSP.simm;
   3755          vassert(-4096 < simm12 && simm12 < 4096);
   3756          vassert(0 == (simm12 & 0xF));
   3757          if (simm12 >= 0) {
   3758             *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
   3759          } else {
   3760             *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
   3761          }
   3762          goto done;
   3763       }
   3764 
   3765       case ARM64in_FromSP: {
   3766          /* 10,0 10001 00 0..(12)..0 11111 dd  MOV Xd, xsp */
   3767          UInt dd = iregEnc(i->ARM64in.FromSP.dst);
   3768          vassert(dd < 31);
   3769          *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
   3770          goto done;
   3771       }
   3772 
   3773       case ARM64in_Mul: {
   3774          /* 100 11011 110 mm 011111 nn dd   UMULH Xd, Xn,Xm
   3775             100 11011 010 mm 011111 nn dd   SMULH Xd, Xn,Xm
   3776             100 11011 000 mm 011111 nn dd   MUL   Xd, Xn,Xm
   3777          */
   3778          UInt dd = iregEnc(i->ARM64in.Mul.dst);
   3779          UInt nn = iregEnc(i->ARM64in.Mul.argL);
   3780          UInt mm = iregEnc(i->ARM64in.Mul.argR);
   3781          vassert(dd < 31 && nn < 31 && mm < 31);
   3782          switch (i->ARM64in.Mul.op) {
   3783             case ARM64mul_ZX:
   3784                *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
   3785                goto done;
   3786             case ARM64mul_SX:
   3787                *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
   3788                goto done;
   3789             case ARM64mul_PLAIN:
   3790                *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
   3791                goto done;
   3792             default:
   3793                vassert(0);
   3794          }
   3795          goto bad;
   3796       }
   3797       case ARM64in_LdrEX: {
   3798          /* 085F7C82   ldxrb w2, [x4]
   3799             485F7C82   ldxrh w2, [x4]
   3800             885F7C82   ldxr  w2, [x4]
   3801             C85F7C82   ldxr  x2, [x4]
   3802          */
   3803          switch (i->ARM64in.LdrEX.szB) {
   3804             case 1: *p++ = 0x085F7C82; goto done;
   3805             case 2: *p++ = 0x485F7C82; goto done;
   3806             case 4: *p++ = 0x885F7C82; goto done;
   3807             case 8: *p++ = 0xC85F7C82; goto done;
   3808             default: break;
   3809          }
   3810          goto bad;
   3811       }
   3812       case ARM64in_StrEX: {
   3813          /* 08007C82   stxrb w0, w2, [x4]
   3814             48007C82   stxrh w0, w2, [x4]
   3815             88007C82   stxr  w0, w2, [x4]
   3816             C8007C82   stxr  w0, x2, [x4]
   3817          */
   3818          switch (i->ARM64in.StrEX.szB) {
   3819             case 1: *p++ = 0x08007C82; goto done;
   3820             case 2: *p++ = 0x48007C82; goto done;
   3821             case 4: *p++ = 0x88007C82; goto done;
   3822             case 8: *p++ = 0xC8007C82; goto done;
   3823             default: break;
   3824          }
   3825          goto bad;
   3826       }
   3827       case ARM64in_CAS: {
   3828          /* This isn't simple.  For an explanation see the comment in
   3829             host_arm64_defs.h on the the definition of ARM64Instr case
   3830             CAS. */
   3831          /* Generate:
   3832               -- one of:
   3833               mov     x8, x5                 // AA0503E8
   3834               and     x8, x5, #0xFFFFFFFF    // 92407CA8
   3835               and     x8, x5, #0xFFFF        // 92403CA8
   3836               and     x8, x5, #0xFF          // 92401CA8
   3837 
   3838               -- one of:
   3839               ldxr    x1, [x3]               // C85F7C61
   3840               ldxr    w1, [x3]               // 885F7C61
   3841               ldxrh   w1, [x3]               // 485F7C61
   3842               ldxrb   w1, [x3]               // 085F7C61
   3843 
   3844               -- always:
   3845               cmp     x1, x8                 // EB08003F
   3846               bne     out                    // 54000061
   3847 
   3848               -- one of:
   3849               stxr    w1, x7, [x3]           // C8017C67
   3850               stxr    w1, w7, [x3]           // 88017C67
   3851               stxrh   w1, w7, [x3]           // 48017C67
   3852               stxrb   w1, w7, [x3]           // 08017C67
   3853 
   3854               -- always:
   3855               eor     x1, x5, x1             // CA0100A1
   3856             out:
   3857          */
   3858          switch (i->ARM64in.CAS.szB) {
   3859             case 8:  *p++ = 0xAA0503E8; break;
   3860             case 4:  *p++ = 0x92407CA8; break;
   3861             case 2:  *p++ = 0x92403CA8; break;
   3862             case 1:  *p++ = 0x92401CA8; break;
   3863             default: vassert(0);
   3864          }
   3865          switch (i->ARM64in.CAS.szB) {
   3866             case 8:  *p++ = 0xC85F7C61; break;
   3867             case 4:  *p++ = 0x885F7C61; break;
   3868             case 2:  *p++ = 0x485F7C61; break;
   3869             case 1:  *p++ = 0x085F7C61; break;
   3870          }
   3871          *p++ = 0xEB08003F;
   3872          *p++ = 0x54000061;
   3873          switch (i->ARM64in.CAS.szB) {
   3874             case 8:  *p++ = 0xC8017C67; break;
   3875             case 4:  *p++ = 0x88017C67; break;
   3876             case 2:  *p++ = 0x48017C67; break;
   3877             case 1:  *p++ = 0x08017C67; break;
   3878          }
   3879          *p++ = 0xCA0100A1;
   3880          goto done;
   3881       }
   3882       case ARM64in_MFence: {
   3883          *p++ = 0xD5033F9F; /* DSB sy */
   3884          *p++ = 0xD5033FBF; /* DMB sy */
   3885          *p++ = 0xD5033FDF; /* ISB */
   3886          goto done;
   3887       }
   3888       case ARM64in_ClrEX: {
   3889          *p++ = 0xD5033F5F; /* clrex #15 */
   3890          goto done;
   3891       }
   3892       case ARM64in_VLdStH: {
   3893          /* 01 111101 01 imm12 n t   LDR Ht, [Xn|SP, #imm12 * 2]
   3894             01 111101 00 imm12 n t   STR Ht, [Xn|SP, #imm12 * 2]
   3895          */
   3896          UInt hD     = dregEnc(i->ARM64in.VLdStH.hD);
   3897          UInt rN     = iregEnc(i->ARM64in.VLdStH.rN);
   3898          UInt uimm12 = i->ARM64in.VLdStH.uimm12;
   3899          Bool isLD   = i->ARM64in.VLdStH.isLoad;
   3900          vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
   3901          uimm12 >>= 1;
   3902          vassert(uimm12 < (1<<12));
   3903          vassert(hD < 32);
   3904          vassert(rN < 31);
   3905          *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
   3906                                uimm12, rN, hD);
   3907          goto done;
   3908       }
   3909       case ARM64in_VLdStS: {
   3910          /* 10 111101 01 imm12 n t   LDR St, [Xn|SP, #imm12 * 4]
   3911             10 111101 00 imm12 n t   STR St, [Xn|SP, #imm12 * 4]
   3912          */
   3913          UInt sD     = dregEnc(i->ARM64in.VLdStS.sD);
   3914          UInt rN     = iregEnc(i->ARM64in.VLdStS.rN);
   3915          UInt uimm12 = i->ARM64in.VLdStS.uimm12;
   3916          Bool isLD   = i->ARM64in.VLdStS.isLoad;
   3917          vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
   3918          uimm12 >>= 2;
   3919          vassert(uimm12 < (1<<12));
   3920          vassert(sD < 32);
   3921          vassert(rN < 31);
   3922          *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
   3923                                uimm12, rN, sD);
   3924          goto done;
   3925       }
   3926       case ARM64in_VLdStD: {
   3927          /* 11 111101 01 imm12 n t   LDR Dt, [Xn|SP, #imm12 * 8]
   3928             11 111101 00 imm12 n t   STR Dt, [Xn|SP, #imm12 * 8]
   3929          */
   3930          UInt dD     = dregEnc(i->ARM64in.VLdStD.dD);
   3931          UInt rN     = iregEnc(i->ARM64in.VLdStD.rN);
   3932          UInt uimm12 = i->ARM64in.VLdStD.uimm12;
   3933          Bool isLD   = i->ARM64in.VLdStD.isLoad;
   3934          vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
   3935          uimm12 >>= 3;
   3936          vassert(uimm12 < (1<<12));
   3937          vassert(dD < 32);
   3938          vassert(rN < 31);
   3939          *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
   3940                                uimm12, rN, dD);
   3941          goto done;
   3942       }
   3943       case ARM64in_VLdStQ: {
   3944          /* 0100 1100 0000 0000 0111 11 rN rQ   st1 {vQ.2d}, [<rN|SP>]
   3945             0100 1100 0100 0000 0111 11 rN rQ   ld1 {vQ.2d}, [<rN|SP>]
   3946          */
   3947          UInt rQ = qregEnc(i->ARM64in.VLdStQ.rQ);
   3948          UInt rN = iregEnc(i->ARM64in.VLdStQ.rN);
   3949          vassert(rQ < 32);
   3950          vassert(rN < 31);
   3951          if (i->ARM64in.VLdStQ.isLoad) {
   3952             *p++ = 0x4C407C00 | (rN << 5) | rQ;
   3953          } else {
   3954             *p++ = 0x4C007C00 | (rN << 5) | rQ;
   3955          }
   3956          goto done;
   3957       }
   3958       case ARM64in_VCvtI2F: {
   3959          /* 31  28    23 21 20 18  15     9 4
   3960             000 11110 00 1  00 010 000000 n d  SCVTF Sd, Wn
   3961             000 11110 01 1  00 010 000000 n d  SCVTF Dd, Wn
   3962             100 11110 00 1  00 010 000000 n d  SCVTF Sd, Xn
   3963             100 11110 01 1  00 010 000000 n d  SCVTF Dd, Xn
   3964             000 11110 00 1  00 011 000000 n d  UCVTF Sd, Wn
   3965             000 11110 01 1  00 011 000000 n d  UCVTF Dd, Wn
   3966             100 11110 00 1  00 011 000000 n d  UCVTF Sd, Xn
   3967             100 11110 01 1  00 011 000000 n d  UCVTF Dd, Xn
   3968          */
   3969          UInt       rN = iregEnc(i->ARM64in.VCvtI2F.rS);
   3970          UInt       rD = dregEnc(i->ARM64in.VCvtI2F.rD);
   3971          ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
   3972          /* Just handle cases as they show up. */
   3973          switch (how) {
   3974             case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
   3975                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
   3976                break;
   3977             case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
   3978                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
   3979                break;
   3980             case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
   3981                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
   3982                break;
   3983             case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
   3984                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
   3985                break;
   3986             case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
   3987                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
   3988                break;
   3989             case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
   3990                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
   3991                break;
   3992             case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
   3993                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
   3994                break;
   3995             case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn  */
   3996                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
   3997                break;
   3998             default:
   3999                goto bad; //ATC
   4000          }
   4001          goto done;
   4002       }
   4003       case ARM64in_VCvtF2I: {
   4004          /*    30       23   20 18  15     9 4
   4005             sf 00,11110,0x 1 00 000,000000 n d  FCVTNS Rd, Fn (round to
   4006             sf 00,11110,0x 1 00 001,000000 n d  FCVTNU Rd, Fn  nearest)
   4007             ---------------- 01 --------------  FCVTP-------- (round to +inf)
   4008             ---------------- 10 --------------  FCVTM-------- (round to -inf)
   4009             ---------------- 11 --------------  FCVTZ-------- (round to zero)
   4010 
   4011             Rd is Xd when sf==1, Wd when sf==0
   4012             Fn is Dn when x==1, Sn when x==0
   4013             20:19 carry the rounding mode, using the same encoding as FPCR
   4014          */
   4015          UInt       rD    = iregEnc(i->ARM64in.VCvtF2I.rD);
   4016          UInt       rN    = dregEnc(i->ARM64in.VCvtF2I.rS);
   4017          ARM64CvtOp how   = i->ARM64in.VCvtF2I.how;
   4018          UChar      armRM = i->ARM64in.VCvtF2I.armRM;
   4019          /* Just handle cases as they show up. */
   4020          switch (how) {
   4021             case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
   4022                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
   4023                                     X000000, rN, rD);
   4024                break;
   4025             case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
   4026                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
   4027                                     X000000, rN, rD);
   4028                break;
   4029             case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
   4030                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
   4031                                     X000000, rN, rD);
   4032                break;
   4033             case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
   4034                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
   4035                                     X000000, rN, rD);
   4036                break;
   4037             case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
   4038                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
   4039                                     X000000, rN, rD);
   4040                break;
   4041             case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
   4042                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
   4043                                     X000000, rN, rD);
   4044                break;
   4045             case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
   4046                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
   4047                                     X000000, rN, rD);
   4048                break;
   4049             case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
   4050                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
   4051                                     X000000, rN, rD);
   4052                break;
   4053             default:
   4054                goto bad; //ATC
   4055          }
   4056          goto done;
   4057       }
   4058       case ARM64in_VCvtSD: {
   4059          /* 31         23 21    16  14    9 4
   4060             000,11110, 00 10001 0,1 10000 n d   FCVT Dd, Sn (S->D)
   4061             ---------- 01 ----- 0,0 ---------   FCVT Sd, Dn (D->S)
   4062             Rounding, when dst is smaller than src, is per the FPCR.
   4063          */
   4064          UInt dd = dregEnc(i->ARM64in.VCvtSD.dst);
   4065          UInt nn = dregEnc(i->ARM64in.VCvtSD.src);
   4066          if (i->ARM64in.VCvtSD.sToD) {
   4067             *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
   4068          } else {
   4069             *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
   4070          }
   4071          goto done;
   4072       }
   4073       case ARM64in_VCvtHS: {
   4074          /* 31         23 21    16  14    9 4
   4075             000,11110, 11 10001 0,0 10000 n d   FCVT Sd, Hn (H->S)
   4076             ---------- 00 ----- 1,1 ---------   FCVT Hd, Sn (S->H)
   4077             Rounding, when dst is smaller than src, is per the FPCR.
   4078          */
   4079          UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
   4080          UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
   4081          if (i->ARM64in.VCvtHS.hToS) {
   4082             *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
   4083          } else {
   4084             *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
   4085          }
   4086          goto done;
   4087       }
   4088       case ARM64in_VCvtHD: {
   4089          /* 31         23 21    16  14    9 4
   4090             000,11110, 11 10001 0,1 10000 n d   FCVT Dd, Hn (H->D)
   4091             ---------- 01 ----- 1,1 ---------   FCVT Hd, Dn (D->H)
   4092             Rounding, when dst is smaller than src, is per the FPCR.
   4093          */
   4094          UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
   4095          UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
   4096          if (i->ARM64in.VCvtHD.hToD) {
   4097             *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
   4098          } else {
   4099             *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
   4100          }
   4101          goto done;
   4102       }
   4103       case ARM64in_VUnaryD: {
   4104          /* 31        23 21     16 14    9 4
   4105             000,11110 01 1,0000 0,0 10000 n d  FMOV Dd, Dn (not handled)
   4106             ------------------- 0,1 ---------  FABS ------
   4107             ------------------- 1,0 ---------  FNEG ------
   4108             ------------------- 1,1 ---------  FSQRT -----
   4109          */
   4110          UInt dD  = dregEnc(i->ARM64in.VUnaryD.dst);
   4111          UInt dN  = dregEnc(i->ARM64in.VUnaryD.src);
   4112          UInt b16 = 2; /* impossible */
   4113          UInt b15 = 2; /* impossible */
   4114          switch (i->ARM64in.VUnaryD.op) {
   4115             case ARM64fpu_NEG:  b16 = 1; b15 = 0; break;
   4116             case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
   4117             case ARM64fpu_ABS:  b16 = 0; b15 = 1; break;
   4118             default: break;
   4119          }
   4120          if (b16 < 2 && b15 < 2) {
   4121             *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
   4122                                  (b15 << 5) | X10000, dN, dD);
   4123             goto done;
   4124          }
   4125          /*
   4126             000, 11110 01 1,001 11,1 10000 n d  FRINTI Dd, Dm (round per FPCR)
   4127          */
   4128          if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
   4129            *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
   4130            goto done;
   4131          }
   4132          /*
   4133             010, 11110 11 1,0000 1,1111 10 n d  FRECPX Dd, Dm
   4134          */
   4135          if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
   4136            *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
   4137            goto done;
   4138          }
   4139          goto bad;
   4140       }
   4141       case ARM64in_VUnaryS: {
   4142          /* 31        23 21     16 14    9 4
   4143             000,11110 00 1,0000 0,0 10000 n d  FMOV Sd, Sn (not handled)
   4144             ------------------- 0,1 ---------  FABS ------
   4145             ------------------- 1,0 ---------  FNEG ------
   4146             ------------------- 1,1 ---------  FSQRT -----
   4147          */
   4148          UInt sD  = dregEnc(i->ARM64in.VUnaryS.dst);
   4149          UInt sN  = dregEnc(i->ARM64in.VUnaryS.src);
   4150          UInt b16 = 2; /* impossible */
   4151          UInt b15 = 2; /* impossible */
   4152          switch (i->ARM64in.VUnaryS.op) {
   4153             case ARM64fpu_NEG:  b16 = 1; b15 = 0; break;
   4154             case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
   4155             case ARM64fpu_ABS:  b16 = 0; b15 = 1; break;
   4156             default: break;
   4157          }
   4158          if (b16 < 2 && b15 < 2) {
   4159             *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
   4160                                  (b15 << 5) | X10000, sN, sD);
   4161             goto done;
   4162          }
   4163          /*
   4164             000, 11110 00 1,001 11,1 10000 n d  FRINTI Sd, Sm (round per FPCR)
   4165          */
   4166          if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
   4167            *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
   4168            goto done;
   4169          }
   4170          /*
   4171             010, 11110 10 1,0000 1,1111 10 n d  FRECPX Sd, Sm
   4172          */
   4173          if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
   4174            *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
   4175            goto done;
   4176          }
   4177          goto bad;
   4178       }
   4179       case ARM64in_VBinD: {
   4180          /* 31        23  20 15   11 9 4
   4181             ---------------- 0000 ------   FMUL  --------
   4182             000 11110 011 m  0001 10 n d   FDIV  Dd,Dn,Dm
   4183             ---------------- 0010 ------   FADD  --------
   4184             ---------------- 0011 ------   FSUB  --------
   4185          */
   4186          UInt dD = dregEnc(i->ARM64in.VBinD.dst);
   4187          UInt dN = dregEnc(i->ARM64in.VBinD.argL);
   4188          UInt dM = dregEnc(i->ARM64in.VBinD.argR);
   4189          UInt b1512 = 16; /* impossible */
   4190          switch (i->ARM64in.VBinD.op) {
   4191             case ARM64fpb_DIV: b1512 = X0001; break;
   4192             case ARM64fpb_MUL: b1512 = X0000; break;
   4193             case ARM64fpb_SUB: b1512 = X0011; break;
   4194             case ARM64fpb_ADD: b1512 = X0010; break;
   4195             default: goto bad;
   4196          }
   4197          vassert(b1512 < 16);
   4198          *p++
   4199             = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
   4200          goto done;
   4201       }
   4202       case ARM64in_VBinS: {
   4203          /* 31        23  20 15   11 9 4
   4204             ---------------- 0000 ------   FMUL  --------
   4205             000 11110 001 m  0001 10 n d   FDIV  Dd,Dn,Dm
   4206             ---------------- 0010 ------   FADD  --------
   4207             ---------------- 0011 ------   FSUB  --------
   4208          */
   4209          UInt sD = dregEnc(i->ARM64in.VBinS.dst);
   4210          UInt sN = dregEnc(i->ARM64in.VBinS.argL);
   4211          UInt sM = dregEnc(i->ARM64in.VBinS.argR);
   4212          UInt b1512 = 16; /* impossible */
   4213          switch (i->ARM64in.VBinS.op) {
   4214             case ARM64fpb_DIV: b1512 = X0001; break;
   4215             case ARM64fpb_MUL: b1512 = X0000; break;
   4216             case ARM64fpb_SUB: b1512 = X0011; break;
   4217             case ARM64fpb_ADD: b1512 = X0010; break;
   4218             default: goto bad;
   4219          }
   4220          vassert(b1512 < 16);
   4221          *p++
   4222             = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
   4223          goto done;
   4224       }
   4225       case ARM64in_VCmpD: {
   4226          /* 000 11110 01 1 m 00 1000 n 00 000  FCMP Dn, Dm */
   4227          UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
   4228          UInt dM = dregEnc(i->ARM64in.VCmpD.argR);
   4229          *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
   4230          goto done;
   4231       }
   4232       case ARM64in_VCmpS: {
   4233          /* 000 11110 00 1 m 00 1000 n 00 000  FCMP Sn, Sm */
   4234          UInt sN = dregEnc(i->ARM64in.VCmpS.argL);
   4235          UInt sM = dregEnc(i->ARM64in.VCmpS.argR);
   4236          *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
   4237          goto done;
   4238       }
   4239       case ARM64in_VFCSel: {
   4240          /* 31        23 21 20 15   11 9 5
   4241             000 11110 00 1  m  cond 11 n d  FCSEL Sd,Sn,Sm,cond
   4242             000 11110 01 1  m  cond 11 n d  FCSEL Dd,Dn,Dm,cond
   4243          */
   4244          Bool isD  = i->ARM64in.VFCSel.isD;
   4245          UInt dd   = dregEnc(i->ARM64in.VFCSel.dst);
   4246          UInt nn   = dregEnc(i->ARM64in.VFCSel.argL);
   4247          UInt mm   = dregEnc(i->ARM64in.VFCSel.argR);
   4248          UInt cond = (UInt)i->ARM64in.VFCSel.cond;
   4249          vassert(cond < 16);
   4250          *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
   4251                               mm, (cond << 2) | X000011, nn, dd);
   4252          goto done;
   4253       }
   4254       case ARM64in_FPCR: {
   4255          Bool toFPCR = i->ARM64in.FPCR.toFPCR;
   4256          UInt iReg   = iregEnc(i->ARM64in.FPCR.iReg);
   4257          if (toFPCR) {
   4258             /* 0xD51B44 000 Rt  MSR fpcr, rT */
   4259             *p++ = 0xD51B4400 | (iReg & 0x1F);
   4260             goto done;
   4261          }
   4262          goto bad; // FPCR -> iReg case currently ATC
   4263       }
   4264       case ARM64in_FPSR: {
   4265          Bool toFPSR = i->ARM64in.FPSR.toFPSR;
   4266          UInt iReg   = iregEnc(i->ARM64in.FPSR.iReg);
   4267          if (toFPSR) {
   4268             /* 0xD51B44 001 Rt  MSR fpsr, rT */
   4269             *p++ = 0xD51B4420 | (iReg & 0x1F);
   4270          } else {
   4271             /* 0xD53B44 001 Rt  MRS rT, fpsr */
   4272             *p++ = 0xD53B4420 | (iReg & 0x1F);
   4273          }
   4274          goto done;
   4275       }
   4276       case ARM64in_VBinV: {
   4277          /* 31        23   20 15     9 4
   4278             010 01110 11 1 m  100001 n d   ADD Vd.2d,  Vn.2d,  Vm.2d
   4279             010 01110 10 1 m  100001 n d   ADD Vd.4s,  Vn.4s,  Vm.4s
   4280             010 01110 01 1 m  100001 n d   ADD Vd.8h,  Vn.8h,  Vm.8h
   4281             010 01110 00 1 m  100001 n d   ADD Vd.16b, Vn.16b, Vm.16b
   4282 
   4283             011 01110 11 1 m  100001 n d   SUB Vd.2d,  Vn.2d,  Vm.2d
   4284             011 01110 10 1 m  100001 n d   SUB Vd.4s,  Vn.4s,  Vm.4s
   4285             011 01110 01 1 m  100001 n d   SUB Vd.8h,  Vn.8h,  Vm.8h
   4286             011 01110 00 1 m  100001 n d   SUB Vd.16b, Vn.16b, Vm.16b
   4287 
   4288             010 01110 10 1 m  100111 n d   MUL Vd.4s,  Vn.4s,  Vm.4s
   4289             010 01110 01 1 m  100111 n d   MUL Vd.8h,  Vn.8h,  Vm.8h
   4290             010 01110 00 1 m  100111 n d   MUL Vd.16b, Vn.16b, Vm.16b
   4291 
   4292             010 01110 01 1 m  110101 n d   FADD Vd.2d, Vn.2d, Vm.2d
   4293             010 01110 00 1 m  110101 n d   FADD Vd.4s, Vn.4s, Vm.4s
   4294             010 01110 11 1 m  110101 n d   FSUB Vd.2d, Vn.2d, Vm.2d
   4295             010 01110 10 1 m  110101 n d   FSUB Vd.4s, Vn.4s, Vm.4s
   4296 
   4297             011 01110 01 1 m  110111 n d   FMUL Vd.2d, Vn.2d, Vm.2d
   4298             011 01110 00 1 m  110111 n d   FMUL Vd.4s, Vn.4s, Vm.4s
   4299             011 01110 01 1 m  111111 n d   FDIV Vd.2d, Vn.2d, Vm.2d
   4300             011 01110 00 1 m  111111 n d   FDIV Vd.4s, Vn.4s, Vm.4s
   4301 
   4302             010 01110 01 1 m  111101 n d   FMAX Vd.2d, Vn.2d, Vm.2d
   4303             010 01110 00 1 m  111101 n d   FMAX Vd.4s, Vn.4s, Vm.4s
   4304             010 01110 11 1 m  111101 n d   FMIN Vd.2d, Vn.2d, Vm.2d
   4305             010 01110 10 1 m  111101 n d   FMIN Vd.4s, Vn.4s, Vm.4s
   4306 
   4307             011 01110 10 1 m  011001 n d   UMAX Vd.4s,  Vn.4s,  Vm.4s
   4308             011 01110 01 1 m  011001 n d   UMAX Vd.8h,  Vn.8h,  Vm.8h
   4309             011 01110 00 1 m  011001 n d   UMAX Vd.16b, Vn.16b, Vm.16b
   4310 
   4311             011 01110 10 1 m  011011 n d   UMIN Vd.4s,  Vn.4s,  Vm.4s
   4312             011 01110 01 1 m  011011 n d   UMIN Vd.8h,  Vn.8h,  Vm.8h
   4313             011 01110 00 1 m  011011 n d   UMIN Vd.16b, Vn.16b, Vm.16b
   4314 
   4315             010 01110 10 1 m  011001 n d   SMAX Vd.4s,  Vn.4s,  Vm.4s
   4316             010 01110 01 1 m  011001 n d   SMAX Vd.8h,  Vn.8h,  Vm.8h
   4317             010 01110 00 1 m  011001 n d   SMAX Vd.16b, Vn.16b, Vm.16b
   4318 
   4319             010 01110 10 1 m  011011 n d   SMIN Vd.4s,  Vn.4s,  Vm.4s
   4320             010 01110 01 1 m  011011 n d   SMIN Vd.8h,  Vn.8h,  Vm.8h
   4321             010 01110 00 1 m  011011 n d   SMIN Vd.16b, Vn.16b, Vm.16b
   4322 
   4323             010 01110 00 1 m  000111 n d   AND Vd, Vn, Vm
   4324             010 01110 10 1 m  000111 n d   ORR Vd, Vn, Vm
   4325             011 01110 00 1 m  000111 n d   EOR Vd, Vn, Vm
   4326 
   4327             011 01110 11 1 m  100011 n d   CMEQ Vd.2d,  Vn.2d,  Vm.2d
   4328             011 01110 10 1 m  100011 n d   CMEQ Vd.4s,  Vn.4s,  Vm.4s
   4329             011 01110 01 1 m  100011 n d   CMEQ Vd.8h,  Vn.8h,  Vm.8h
   4330             011 01110 00 1 m  100011 n d   CMEQ Vd.16b, Vn.16b, Vm.16b
   4331 
   4332             011 01110 11 1 m  001101 n d   CMHI Vd.2d,  Vn.2d,  Vm.2d
   4333             011 01110 10 1 m  001101 n d   CMHI Vd.4s,  Vn.4s,  Vm.4s
   4334             011 01110 01 1 m  001101 n d   CMHI Vd.8h,  Vn.8h,  Vm.8h
   4335             011 01110 00 1 m  001101 n d   CMHI Vd.16b, Vn.16b, Vm.16b
   4336 
   4337             010 01110 11 1 m  001101 n d   CMGT Vd.2d,  Vn.2d,  Vm.2d
   4338             010 01110 10 1 m  001101 n d   CMGT Vd.4s,  Vn.4s,  Vm.4s
   4339             010 01110 01 1 m  001101 n d   CMGT Vd.8h,  Vn.8h,  Vm.8h
   4340             010 01110 00 1 m  001101 n d   CMGT Vd.16b, Vn.16b, Vm.16b
   4341 
   4342             010 01110 01 1 m  111001 n d   FCMEQ Vd.2d, Vn.2d, Vm.2d
   4343             010 01110 00 1 m  111001 n d   FCMEQ Vd.4s, Vn.4s, Vm.4s
   4344 
   4345             011 01110 01 1 m  111001 n d   FCMGE Vd.2d, Vn.2d, Vm.2d
   4346             011 01110 00 1 m  111001 n d   FCMGE Vd.4s, Vn.4s, Vm.4s
   4347 
   4348             011 01110 11 1 m  111001 n d   FCMGT Vd.2d, Vn.2d, Vm.2d
   4349             011 01110 10 1 m  111001 n d   FCMGT Vd.4s, Vn.4s, Vm.4s
   4350 
   4351             010 01110 00 0 m  000000 n d   TBL Vd.16b, {Vn.16b}, Vm.16b
   4352 
   4353             010 01110 11 0 m  000110 n d   UZP1 Vd.2d,  Vn.2d,  Vm.2d
   4354             010 01110 10 0 m  000110 n d   UZP1 Vd.4s,  Vn.4s,  Vm.4s
   4355             010 01110 01 0 m  000110 n d   UZP1 Vd.8h,  Vn.8h,  Vm.8h
   4356             010 01110 00 0 m  000110 n d   UZP1 Vd.16b, Vn.16b, Vm.16b
   4357 
   4358             010 01110 11 0 m  010110 n d   UZP2 Vd.2d,  Vn.2d,  Vm.2d
   4359             010 01110 10 0 m  010110 n d   UZP2 Vd.4s,  Vn.4s,  Vm.4s
   4360             010 01110 01 0 m  010110 n d   UZP2 Vd.8h,  Vn.8h,  Vm.8h
   4361             010 01110 00 0 m  010110 n d   UZP2 Vd.16b, Vn.16b, Vm.16b
   4362 
   4363             010 01110 10 0 m  001110 n d   ZIP1 Vd.4s,  Vn.4s,  Vm.4s
   4364             010 01110 01 0 m  001110 n d   ZIP1 Vd.8h,  Vn.8h,  Vm.8h
   4365             010 01110 10 0 m  001110 n d   ZIP1 Vd.16b, Vn.16b, Vm.16b
   4366 
   4367             010 01110 10 0 m  011110 n d   ZIP2 Vd.4s,  Vn.4s,  Vm.4s
   4368             010 01110 01 0 m  011110 n d   ZIP2 Vd.8h,  Vn.8h,  Vm.8h
   4369             010 01110 10 0 m  011110 n d   ZIP2 Vd.16b, Vn.16b, Vm.16b
   4370 
   4371             011 01110 00 1 m  100111 n d   PMUL Vd.16b, Vn.16b, Vm.16b
   4372 
   4373             000 01110 00 1 m  111000 n d   PMULL Vd.8h, Vn.8b, Vm.8b
   4374 
   4375             001 01110 10 1 m  110000 n d   UMULL Vd.2d, Vn.2s, Vm.2s
   4376             001 01110 01 1 m  110000 n d   UMULL Vd.4s, Vn.4h, Vm.4h
   4377             001 01110 00 1 m  110000 n d   UMULL Vd.8h, Vn.8b, Vm.8b
   4378 
   4379             000 01110 10 1 m  110000 n d   SMULL Vd.2d, Vn.2s, Vm.2s
   4380             000 01110 01 1 m  110000 n d   SMULL Vd.4s, Vn.4h, Vm.4h
   4381             000 01110 00 1 m  110000 n d   SMULL Vd.8h, Vn.8b, Vm.8b
   4382 
   4383             010 01110 11 1 m  000011 n d   SQADD Vd.2d,  Vn.2d,  Vm.2d
   4384             010 01110 10 1 m  000011 n d   SQADD Vd.4s,  Vn.4s,  Vm.4s
   4385             010 01110 01 1 m  000011 n d   SQADD Vd.8h,  Vn.8h,  Vm.8h
   4386             010 01110 00 1 m  000011 n d   SQADD Vd.16b, Vn.16b, Vm.16b
   4387 
   4388             011 01110 11 1 m  000011 n d   UQADD Vd.2d,  Vn.2d,  Vm.2d
   4389             011 01110 10 1 m  000011 n d   UQADD Vd.4s,  Vn.4s,  Vm.4s
   4390             011 01110 01 1 m  000011 n d   UQADD Vd.8h,  Vn.8h,  Vm.8h
   4391             011 01110 00 1 m  000011 n d   UQADD Vd.16b, Vn.16b, Vm.16b
   4392 
   4393             010 01110 11 1 m  001011 n d   SQSUB Vd.2d,  Vn.2d,  Vm.2d
   4394             010 01110 10 1 m  001011 n d   SQSUB Vd.4s,  Vn.4s,  Vm.4s
   4395             010 01110 01 1 m  001011 n d   SQSUB Vd.8h,  Vn.8h,  Vm.8h
   4396             010 01110 00 1 m  001011 n d   SQSUB Vd.16b, Vn.16b, Vm.16b
   4397 
   4398             011 01110 11 1 m  001011 n d   UQSUB Vd.2d,  Vn.2d,  Vm.2d
   4399             011 01110 10 1 m  001011 n d   UQSUB Vd.4s,  Vn.4s,  Vm.4s
   4400             011 01110 01 1 m  001011 n d   UQSUB Vd.8h,  Vn.8h,  Vm.8h
   4401             011 01110 00 1 m  001011 n d   UQSUB Vd.16b, Vn.16b, Vm.16b
   4402 
   4403             000 01110 10 1 m  110100 n d   SQDMULL Vd.2d, Vn.2s, Vm.2s
   4404             000 01110 01 1 m  110100 n d   SQDMULL Vd.4s, Vn.4h, Vm.4h
   4405 
   4406             010 01110 10 1 m  101101 n d   SQDMULH   Vd.4s,  Vn.4s,  Vm.4s
   4407             010 01110 01 1 m  101101 n d   SQDMULH   Vd.8h,  Vn.8h,  Vm.8h
   4408             011 01110 10 1 m  101101 n d   SQRDMULH  Vd.4s,  Vn.4s,  Vm.4s
   4409             011 01110 10 1 m  101101 n d   SQRDMULH  Vd.8h,  Vn.8h,  Vm.8h
   4410 
   4411             010 01110 sz 1 m  010011 n d   SQSHL@sz   Vd, Vn, Vm
   4412             010 01110 sz 1 m  010111 n d   SQRSHL@sz  Vd, Vn, Vm
   4413             011 01110 sz 1 m  010011 n d   UQSHL@sz   Vd, Vn, Vm
   4414             011 01110 sz 1 m  010111 n d   URQSHL@sz  Vd, Vn, Vm
   4415 
   4416             010 01110 sz 1 m  010001 n d   SSHL@sz   Vd, Vn, Vm
   4417             010 01110 sz 1 m  010101 n d   SRSHL@sz  Vd, Vn, Vm
   4418             011 01110 sz 1 m  010001 n d   USHL@sz   Vd, Vn, Vm
   4419             011 01110 sz 1 m  010101 n d   URSHL@sz  Vd, Vn, Vm
   4420 
   4421             010 01110 01 1 m  111111 n d   FRECPS  Vd.2d, Vn.2d, Vm.2d
   4422             010 01110 00 1 m  111111 n d   FRECPS  Vd.4s, Vn.4s, Vm.4s
   4423             010 01110 11 1 m  111111 n d   FRSQRTS Vd.2d, Vn.2d, Vm.2d
   4424             010 01110 10 1 m  111111 n d   FRSQRTS Vd.4s, Vn.4s, Vm.4s
   4425          */
   4426          UInt vD = qregEnc(i->ARM64in.VBinV.dst);
   4427          UInt vN = qregEnc(i->ARM64in.VBinV.argL);
   4428          UInt vM = qregEnc(i->ARM64in.VBinV.argR);
   4429          switch (i->ARM64in.VBinV.op) {
   4430             case ARM64vecb_ADD64x2:
   4431                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
   4432                break;
   4433             case ARM64vecb_ADD32x4:
   4434                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
   4435                break;
   4436             case ARM64vecb_ADD16x8:
   4437                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
   4438                break;
   4439             case ARM64vecb_ADD8x16:
   4440                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
   4441                break;
   4442             case ARM64vecb_SUB64x2:
   4443                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
   4444                break;
   4445             case ARM64vecb_SUB32x4:
   4446                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
   4447                break;
   4448             case ARM64vecb_SUB16x8:
   4449                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
   4450                break;
   4451             case ARM64vecb_SUB8x16:
   4452                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
   4453                break;
   4454             case ARM64vecb_MUL32x4:
   4455                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
   4456                break;
   4457             case ARM64vecb_MUL16x8:
   4458                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
   4459                break;
   4460             case ARM64vecb_MUL8x16:
   4461                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
   4462                break;
   4463             case ARM64vecb_FADD64x2:
   4464                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
   4465                break;
   4466             case ARM64vecb_FADD32x4:
   4467                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
   4468                break;
   4469             case ARM64vecb_FSUB64x2:
   4470                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
   4471                break;
   4472             case ARM64vecb_FSUB32x4:
   4473                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
   4474                break;
   4475             case ARM64vecb_FMUL64x2:
   4476                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
   4477                break;
   4478             case ARM64vecb_FMUL32x4:
   4479                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
   4480                break;
   4481             case ARM64vecb_FDIV64x2:
   4482                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
   4483                break;
   4484             case ARM64vecb_FDIV32x4:
   4485                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
   4486                break;
   4487 
   4488             case ARM64vecb_FMAX64x2:
   4489                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
   4490                break;
   4491             case ARM64vecb_FMAX32x4:
   4492                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
   4493                break;
   4494             case ARM64vecb_FMIN64x2:
   4495                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
   4496                break;
   4497             case ARM64vecb_FMIN32x4:
   4498                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
   4499                break;
   4500 
   4501             case ARM64vecb_UMAX32x4:
   4502                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
   4503                break;
   4504             case ARM64vecb_UMAX16x8:
   4505                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
   4506                break;
   4507             case ARM64vecb_UMAX8x16:
   4508                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
   4509                break;
   4510 
   4511             case ARM64vecb_UMIN32x4:
   4512                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
   4513                break;
   4514             case ARM64vecb_UMIN16x8:
   4515                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
   4516                break;
   4517             case ARM64vecb_UMIN8x16:
   4518                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
   4519                break;
   4520 
   4521             case ARM64vecb_SMAX32x4:
   4522                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
   4523                break;
   4524             case ARM64vecb_SMAX16x8:
   4525                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
   4526                break;
   4527             case ARM64vecb_SMAX8x16:
   4528                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
   4529                break;
   4530 
   4531             case ARM64vecb_SMIN32x4:
   4532                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
   4533                break;
   4534             case ARM64vecb_SMIN16x8:
   4535                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
   4536                break;
   4537             case ARM64vecb_SMIN8x16:
   4538                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
   4539                break;
   4540 
   4541             case ARM64vecb_AND:
   4542                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
   4543                break;
   4544             case ARM64vecb_ORR:
   4545                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
   4546                break;
   4547             case ARM64vecb_XOR:
   4548                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
   4549                break;
   4550 
   4551             case ARM64vecb_CMEQ64x2:
   4552                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
   4553                break;
   4554             case ARM64vecb_CMEQ32x4:
   4555                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
   4556                break;
   4557             case ARM64vecb_CMEQ16x8:
   4558                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
   4559                break;
   4560             case ARM64vecb_CMEQ8x16:
   4561                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
   4562                break;
   4563 
   4564             case ARM64vecb_CMHI64x2:
   4565                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM,  X001101, vN, vD);
   4566                break;
   4567             case ARM64vecb_CMHI32x4:
   4568                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM,  X001101, vN, vD);
   4569                break;
   4570             case ARM64vecb_CMHI16x8:
   4571                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM,  X001101, vN, vD);
   4572                break;
   4573             case ARM64vecb_CMHI8x16:
   4574                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM,  X001101, vN, vD);
   4575                break;
   4576 
   4577             case ARM64vecb_CMGT64x2:
   4578                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM,  X001101, vN, vD);
   4579                break;
   4580             case ARM64vecb_CMGT32x4:
   4581                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM,  X001101, vN, vD);
   4582                break;
   4583             case ARM64vecb_CMGT16x8:
   4584                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM,  X001101, vN, vD);
   4585                break;
   4586             case ARM64vecb_CMGT8x16:
   4587                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM,  X001101, vN, vD);
   4588                break;
   4589 
   4590             case ARM64vecb_FCMEQ64x2:
   4591                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
   4592                break;
   4593             case ARM64vecb_FCMEQ32x4:
   4594                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
   4595                break;
   4596 
   4597             case ARM64vecb_FCMGE64x2:
   4598                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
   4599                break;
   4600             case ARM64vecb_FCMGE32x4:
   4601                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
   4602                break;
   4603 
   4604             case ARM64vecb_FCMGT64x2:
   4605                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
   4606                break;
   4607             case ARM64vecb_FCMGT32x4:
   4608                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
   4609                break;
   4610 
   4611             case ARM64vecb_TBL1:
   4612                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
   4613                break;
   4614 
   4615             case ARM64vecb_UZP164x2:
   4616                *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
   4617                break;
   4618             case ARM64vecb_UZP132x4:
   4619                *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
   4620                break;
   4621             case ARM64vecb_UZP116x8:
   4622                *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
   4623                break;
   4624             case ARM64vecb_UZP18x16:
   4625                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
   4626                break;
   4627 
   4628             case ARM64vecb_UZP264x2:
   4629                *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
   4630                break;
   4631             case ARM64vecb_UZP232x4:
   4632                *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
   4633                break;
   4634             case ARM64vecb_UZP216x8:
   4635                *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
   4636                break;
   4637             case ARM64vecb_UZP28x16:
   4638                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
   4639                break;
   4640 
   4641             case ARM64vecb_ZIP132x4:
   4642                *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
   4643                break;
   4644             case ARM64vecb_ZIP116x8:
   4645                *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
   4646                break;
   4647             case ARM64vecb_ZIP18x16:
   4648                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
   4649                break;
   4650 
   4651             case ARM64vecb_ZIP232x4:
   4652                *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
   4653                break;
   4654             case ARM64vecb_ZIP216x8:
   4655                *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
   4656                break;
   4657             case ARM64vecb_ZIP28x16:
   4658                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
   4659                break;
   4660 
   4661             case ARM64vecb_PMUL8x16:
   4662                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
   4663                break;
   4664 
   4665             case ARM64vecb_PMULL8x8:
   4666                *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
   4667                break;
   4668 
   4669             case ARM64vecb_UMULL2DSS:
   4670                *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
   4671                break;
   4672             case ARM64vecb_UMULL4SHH:
   4673                *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
   4674                break;
   4675             case ARM64vecb_UMULL8HBB:
   4676                *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
   4677                break;
   4678 
   4679             case ARM64vecb_SMULL2DSS:
   4680                *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
   4681                break;
   4682             case ARM64vecb_SMULL4SHH:
   4683                *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
   4684                break;
   4685             case ARM64vecb_SMULL8HBB:
   4686                *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
   4687                break;
   4688 
   4689             case ARM64vecb_SQADD64x2:
   4690                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
   4691                break;
   4692             case ARM64vecb_SQADD32x4:
   4693                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
   4694                break;
   4695             case ARM64vecb_SQADD16x8:
   4696                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
   4697                break;
   4698             case ARM64vecb_SQADD8x16:
   4699                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
   4700                break;
   4701 
   4702             case ARM64vecb_UQADD64x2:
   4703                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
   4704                break;
   4705             case ARM64vecb_UQADD32x4:
   4706                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
   4707                break;
   4708             case ARM64vecb_UQADD16x8:
   4709                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
   4710                break;
   4711             case ARM64vecb_UQADD8x16:
   4712                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
   4713                break;
   4714 
   4715             case ARM64vecb_SQSUB64x2:
   4716                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
   4717                break;
   4718             case ARM64vecb_SQSUB32x4:
   4719                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
   4720                break;
   4721             case ARM64vecb_SQSUB16x8:
   4722                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
   4723                break;
   4724             case ARM64vecb_SQSUB8x16:
   4725                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
   4726                break;
   4727 
   4728             case ARM64vecb_UQSUB64x2:
   4729                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
   4730                break;
   4731             case ARM64vecb_UQSUB32x4:
   4732                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
   4733                break;
   4734             case ARM64vecb_UQSUB16x8:
   4735                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
   4736                break;
   4737             case ARM64vecb_UQSUB8x16:
   4738                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
   4739                break;
   4740 
   4741             case ARM64vecb_SQDMULL2DSS:
   4742                *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
   4743                break;
   4744             case ARM64vecb_SQDMULL4SHH:
   4745                *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
   4746                break;
   4747 
   4748             case ARM64vecb_SQDMULH32x4:
   4749                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
   4750                break;
   4751             case ARM64vecb_SQDMULH16x8:
   4752                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
   4753                break;
   4754             case ARM64vecb_SQRDMULH32x4:
   4755                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
   4756                break;
   4757             case ARM64vecb_SQRDMULH16x8:
   4758                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
   4759                break;
   4760 
   4761             case ARM64vecb_SQSHL64x2:
   4762                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
   4763                break;
   4764             case ARM64vecb_SQSHL32x4:
   4765                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
   4766                break;
   4767             case ARM64vecb_SQSHL16x8:
   4768                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
   4769                break;
   4770             case ARM64vecb_SQSHL8x16:
   4771                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
   4772                break;
   4773 
   4774             case ARM64vecb_SQRSHL64x2:
   4775                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
   4776                break;
   4777             case ARM64vecb_SQRSHL32x4:
   4778                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
   4779                break;
   4780             case ARM64vecb_SQRSHL16x8:
   4781                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
   4782                break;
   4783             case ARM64vecb_SQRSHL8x16:
   4784                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
   4785                break;
   4786 
   4787             case ARM64vecb_UQSHL64x2:
   4788                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
   4789                break;
   4790             case ARM64vecb_UQSHL32x4:
   4791                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
   4792                break;
   4793             case ARM64vecb_UQSHL16x8:
   4794                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
   4795                break;
   4796             case ARM64vecb_UQSHL8x16:
   4797                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
   4798                break;
   4799 
   4800             case ARM64vecb_UQRSHL64x2:
   4801                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
   4802                break;
   4803             case ARM64vecb_UQRSHL32x4:
   4804                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
   4805                break;
   4806             case ARM64vecb_UQRSHL16x8:
   4807                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
   4808                break;
   4809             case ARM64vecb_UQRSHL8x16:
   4810                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
   4811                break;
   4812 
   4813             case ARM64vecb_SSHL64x2:
   4814                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
   4815                break;
   4816             case ARM64vecb_SSHL32x4:
   4817                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
   4818                break;
   4819             case ARM64vecb_SSHL16x8:
   4820                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
   4821                break;
   4822             case ARM64vecb_SSHL8x16:
   4823                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
   4824                break;
   4825 
   4826             case ARM64vecb_SRSHL64x2:
   4827                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
   4828                break;
   4829             case ARM64vecb_SRSHL32x4:
   4830                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
   4831                break;
   4832             case ARM64vecb_SRSHL16x8:
   4833                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
   4834                break;
   4835             case ARM64vecb_SRSHL8x16:
   4836                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
   4837                break;
   4838 
   4839             case ARM64vecb_USHL64x2:
   4840                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
   4841                break;
   4842             case ARM64vecb_USHL32x4:
   4843                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
   4844                break;
   4845             case ARM64vecb_USHL16x8:
   4846                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
   4847                break;
   4848             case ARM64vecb_USHL8x16:
   4849                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
   4850                break;
   4851 
   4852             case ARM64vecb_URSHL64x2:
   4853                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
   4854                break;
   4855             case ARM64vecb_URSHL32x4:
   4856                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
   4857                break;
   4858             case ARM64vecb_URSHL16x8:
   4859                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
   4860                break;
   4861             case ARM64vecb_URSHL8x16:
   4862                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
   4863                break;
   4864 
   4865             case ARM64vecb_FRECPS64x2:
   4866                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
   4867                break;
   4868             case ARM64vecb_FRECPS32x4:
   4869                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
   4870                break;
   4871             case ARM64vecb_FRSQRTS64x2:
   4872                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
   4873                break;
   4874             case ARM64vecb_FRSQRTS32x4:
   4875                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
   4876                break;
   4877 
   4878             default:
   4879                goto bad;
   4880          }
   4881          goto done;
   4882       }
   4883       case ARM64in_VModifyV: {
   4884          /* 31        23   20    15     9 4
   4885             010 01110 sz 1 00000 001110 n d   SUQADD@sz  Vd, Vn
   4886             011 01110 sz 1 00000 001110 n d   USQADD@sz  Vd, Vn
   4887          */
   4888          UInt vD = qregEnc(i->ARM64in.VModifyV.mod);
   4889          UInt vN = qregEnc(i->ARM64in.VModifyV.arg);
   4890          switch (i->ARM64in.VModifyV.op) {
   4891             case ARM64vecmo_SUQADD64x2:
   4892                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
   4893                break;
   4894             case ARM64vecmo_SUQADD32x4:
   4895                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
   4896                break;
   4897             case ARM64vecmo_SUQADD16x8:
   4898                *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
   4899                break;
   4900             case ARM64vecmo_SUQADD8x16:
   4901                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
   4902                break;
   4903             case ARM64vecmo_USQADD64x2:
   4904                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
   4905                break;
   4906             case ARM64vecmo_USQADD32x4:
   4907                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
   4908                break;
   4909             case ARM64vecmo_USQADD16x8:
   4910                *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
   4911                break;
   4912             case ARM64vecmo_USQADD8x16:
   4913                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
   4914                break;
   4915             default:
   4916                goto bad;
   4917          }
   4918          goto done;
   4919       }
   4920       case ARM64in_VUnaryV: {
   4921          /* 31        23   20    15     9 4
   4922             010 01110 11 1 00000 111110 n d  FABS Vd.2d,  Vn.2d
   4923             010 01110 10 1 00000 111110 n d  FABS Vd.4s,  Vn.4s
   4924             011 01110 11 1 00000 111110 n d  FNEG Vd.2d,  Vn.2d
   4925             011 01110 10 1 00000 111110 n d  FNEG Vd.4s,  Vn.4s
   4926             011 01110 00 1 00000 010110 n d  NOT  Vd.16b, Vn.16b
   4927 
   4928             010 01110 11 1 00000 101110 n d  ABS  Vd.2d,  Vn.2d
   4929             010 01110 10 1 00000 101110 n d  ABS  Vd.4s,  Vn.4s
   4930             010 01110 01 1 00000 101110 n d  ABS  Vd.8h,  Vn.8h
   4931             010 01110 00 1 00000 101110 n d  ABS  Vd.16b, Vn.16b
   4932 
   4933             010 01110 10 1 00000 010010 n d  CLS  Vd.4s,  Vn.4s
   4934             010 01110 01 1 00000 010010 n d  CLS  Vd.8h,  Vn.8h
   4935             010 01110 00 1 00000 010010 n d  CLS  Vd.16b, Vn.16b
   4936 
   4937             011 01110 10 1 00000 010010 n d  CLZ  Vd.4s,  Vn.4s
   4938             011 01110 01 1 00000 010010 n d  CLZ  Vd.8h,  Vn.8h
   4939             011 01110 00 1 00000 010010 n d  CLZ  Vd.16b, Vn.16b
   4940 
   4941             010 01110 00 1 00000 010110 n d  CNT  Vd.16b, Vn.16b
   4942 
   4943             011 01110 01 1 00000 010110 n d  RBIT  Vd.16b, Vn.16b
   4944             010 01110 00 1 00000 000110 n d  REV16 Vd.16b, Vn.16b
   4945             011 01110 00 1 00000 000010 n d  REV32 Vd.16b, Vn.16b
   4946             011 01110 01 1 00000 000010 n d  REV32 Vd.8h, Vn.8h
   4947 
   4948             010 01110 00 1 00000 000010 n d  REV64 Vd.16b, Vn.16b
   4949             010 01110 01 1 00000 000010 n d  REV64 Vd.8h, Vn.8h
   4950             010 01110 10 1 00000 000010 n d  REV64 Vd.4s, Vn.4s
   4951 
   4952             010 01110 10 1 00001 110010 n d  URECPE Vd.4s, Vn.4s
   4953             011 01110 10 1 00001 110010 n d  URSQRTE Vd.4s, Vn.4s
   4954 
   4955             010 01110 11 1 00001 110110 n d  FRECPE Vd.2d, Vn.2d
   4956             010 01110 10 1 00001 110110 n d  FRECPE Vd.4s, Vn.4s
   4957 
   4958             011 01110 11 1 00001 110110 n d  FRECPE Vd.2d, Vn.2d
   4959             011 01110 10 1 00001 110110 n d  FRECPE Vd.4s, Vn.4s
   4960 
   4961             011 01110 11 1 00001 111110 n d  FSQRT Vd.2d, Vn.2d
   4962             011 01110 10 1 00001 111110 n d  FSQRT Vd.4s, Vn.4s
   4963          */
   4964          UInt vD = qregEnc(i->ARM64in.VUnaryV.dst);
   4965          UInt vN = qregEnc(i->ARM64in.VUnaryV.arg);
   4966          switch (i->ARM64in.VUnaryV.op) {
   4967             case ARM64vecu_FABS64x2:
   4968                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
   4969                break;
   4970             case ARM64vecu_FABS32x4:
   4971                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
   4972                break;
   4973             case ARM64vecu_FNEG64x2:
   4974                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
   4975                break;
   4976             case ARM64vecu_FNEG32x4:
   4977                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
   4978                break;
   4979             case ARM64vecu_NOT:
   4980                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
   4981                break;
   4982             case ARM64vecu_ABS64x2:
   4983                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
   4984                break;
   4985             case ARM64vecu_ABS32x4:
   4986                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
   4987                break;
   4988             case ARM64vecu_ABS16x8:
   4989                *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
   4990                break;
   4991             case ARM64vecu_ABS8x16:
   4992                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
   4993                break;
   4994             case ARM64vecu_CLS32x4:
   4995                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
   4996                break;
   4997             case ARM64vecu_CLS16x8:
   4998                *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
   4999                break;
   5000             case ARM64vecu_CLS8x16:
   5001                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
   5002                break;
   5003             case ARM64vecu_CLZ32x4:
   5004                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
   5005                break;
   5006             case ARM64vecu_CLZ16x8:
   5007                *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
   5008                break;
   5009             case ARM64vecu_CLZ8x16:
   5010                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
   5011                break;
   5012             case ARM64vecu_CNT8x16:
   5013                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
   5014                break;
   5015             case ARM64vecu_RBIT:
   5016                *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
   5017                break;
   5018             case ARM64vecu_REV1616B:
   5019                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
   5020                break;
   5021             case ARM64vecu_REV3216B:
   5022                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
   5023                break;
   5024             case ARM64vecu_REV328H:
   5025                *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
   5026                break;
   5027             case ARM64vecu_REV6416B:
   5028                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
   5029                break;
   5030             case ARM64vecu_REV648H:
   5031                *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
   5032                break;
   5033             case ARM64vecu_REV644S:
   5034                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
   5035                break;
   5036             case ARM64vecu_URECPE32x4:
   5037                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
   5038                break;
   5039             case ARM64vecu_URSQRTE32x4:
   5040                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
   5041                break;
   5042             case ARM64vecu_FRECPE64x2:
   5043                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
   5044                break;
   5045             case ARM64vecu_FRECPE32x4:
   5046                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
   5047                break;
   5048             case ARM64vecu_FRSQRTE64x2:
   5049                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
   5050                break;
   5051             case ARM64vecu_FRSQRTE32x4:
   5052                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
   5053                break;
   5054             case ARM64vecu_FSQRT64x2:
   5055                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD);
   5056                break;
   5057             case ARM64vecu_FSQRT32x4:
   5058                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD);
   5059                break;
   5060             default:
   5061                goto bad;
   5062          }
   5063          goto done;
   5064       }
   5065       case ARM64in_VNarrowV: {
   5066          /* 31        23 21      15     9 4
   5067             000 01110 00 1,00001 001010 n d  XTN Vd.8b, Vn.8h
   5068             000 01110 01 1,00001 001010 n d  XTN Vd.4h, Vn.4s
   5069             000 01110 10 1,00001 001010 n d  XTN Vd.2s, Vn.2d
   5070 
   5071             001 01110 00 1,00001 001010 n d  SQXTUN Vd.8b, Vn.8h
   5072             001 01110 01 1,00001 001010 n d  SQXTUN Vd.4h, Vn.4s
   5073             001 01110 10 1,00001 001010 n d  SQXTUN Vd.2s, Vn.2d
   5074 
   5075             000 01110 00 1,00001 010010 n d  SQXTN Vd.8b, Vn.8h
   5076             000 01110 01 1,00001 010010 n d  SQXTN Vd.4h, Vn.4s
   5077             000 01110 10 1,00001 010010 n d  SQXTN Vd.2s, Vn.2d
   5078 
   5079             001 01110 00 1,00001 010010 n d  UQXTN Vd.8b, Vn.8h
   5080             001 01110 01 1,00001 010010 n d  UQXTN Vd.4h, Vn.4s
   5081             001 01110 10 1,00001 010010 n d  UQXTN Vd.2s, Vn.2d
   5082          */
   5083          UInt vD = qregEnc(i->ARM64in.VNarrowV.dst);
   5084          UInt vN = qregEnc(i->ARM64in.VNarrowV.src);
   5085          UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
   5086          vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
   5087          switch (i->ARM64in.VNarrowV.op) {
   5088             case ARM64vecna_XTN:
   5089                *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
   5090                                     X00001, X001010, vN, vD);
   5091                goto done;
   5092             case ARM64vecna_SQXTUN:
   5093                *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
   5094                                     X00001, X001010, vN, vD);
   5095                goto done;
   5096             case ARM64vecna_SQXTN:
   5097                *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
   5098                                     X00001, X010010, vN, vD);
   5099                goto done;
   5100             case ARM64vecna_UQXTN:
   5101                *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
   5102                                     X00001, X010010, vN, vD);
   5103                goto done;
   5104             default:
   5105                break;
   5106          }
   5107          goto bad;
   5108       }
   5109       case ARM64in_VShiftImmV: {
   5110          /*
   5111             011 011110 immh immb 000001 n d  USHR     Vd.T, Vn.T, #sh
   5112             010 011110 immh immb 000001 n d  SSHR     Vd.T, Vn.T, #sh
   5113 
   5114             001 011110 immh immb 100101 n d  UQSHRN   ,,#sh
   5115             000 011110 immh immb 100101 n d  SQSHRN   ,,#sh
   5116             001 011110 immh immb 100001 n d  SQSHRUN  ,,#sh
   5117 
   5118             001 011110 immh immb 100111 n d  UQRSHRN  ,,#sh
   5119             000 011110 immh immb 100111 n d  SQRSHRN  ,,#sh
   5120             001 011110 immh immb 100011 n d  SQRSHRUN ,,#sh
   5121 
   5122             where immh:immb
   5123                = case T of
   5124                     2d  | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
   5125                     4s  | sh in 1..32 -> let  xxxxx = 32-sh in 01xx:xxx
   5126                     8h  | sh in 1..16 -> let   xxxx = 16-sh in 001x:xxx
   5127                     16b | sh in 1..8  -> let    xxx =  8-sh in 0001:xxx
   5128 
   5129             010 011110 immh immb 010101 n d  SHL    Vd.T, Vn.T, #sh
   5130 
   5131             011 011110 immh immb 011101 n d  UQSHL  Vd.T, Vn.T, #sh
   5132             010 011110 immh immb 011101 n d  SQSHL  Vd.T, Vn.T, #sh
   5133             011 011110 immh immb 011001 n d  SQSHLU Vd.T, Vn.T, #sh
   5134 
   5135             where immh:immb
   5136                = case T of
   5137                     2d  | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
   5138                     4s  | sh in 0..31 -> let  xxxxx = sh in 01xx:xxx
   5139                     8h  | sh in 0..15 -> let   xxxx = sh in 001x:xxx
   5140                     16b | sh in 0..7  -> let    xxx = sh in 0001:xxx
   5141          */
   5142          UInt vD   = qregEnc(i->ARM64in.VShiftImmV.dst);
   5143          UInt vN   = qregEnc(i->ARM64in.VShiftImmV.src);
   5144          UInt sh   = i->ARM64in.VShiftImmV.amt;
   5145          UInt tmpl = 0; /* invalid */
   5146 
   5147          const UInt tmpl_USHR
   5148             = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
   5149          const UInt tmpl_SSHR
   5150             = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
   5151 
   5152          const UInt tmpl_UQSHRN
   5153             = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
   5154          const UInt tmpl_SQSHRN
   5155             = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
   5156          const UInt tmpl_SQSHRUN
   5157             = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
   5158 
   5159          const UInt tmpl_UQRSHRN
   5160             = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
   5161          const UInt tmpl_SQRSHRN
   5162             = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
   5163          const UInt tmpl_SQRSHRUN
   5164             = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
   5165 
   5166          const UInt tmpl_SHL
   5167             = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
   5168 
   5169          const UInt tmpl_UQSHL
   5170             = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
   5171          const UInt tmpl_SQSHL
   5172             = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
   5173          const UInt tmpl_SQSHLU
   5174             = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
   5175 
   5176          switch (i->ARM64in.VShiftImmV.op) {
   5177             case ARM64vecshi_SSHR64x2:    tmpl = tmpl_SSHR;     goto right64x2;
   5178             case ARM64vecshi_USHR64x2:    tmpl = tmpl_USHR;     goto right64x2;
   5179             case ARM64vecshi_SHL64x2:     tmpl = tmpl_SHL;      goto left64x2;
   5180             case ARM64vecshi_UQSHL64x2:   tmpl = tmpl_UQSHL;    goto left64x2;
   5181             case ARM64vecshi_SQSHL64x2:   tmpl = tmpl_SQSHL;    goto left64x2;
   5182             case ARM64vecshi_SQSHLU64x2:  tmpl = tmpl_SQSHLU;   goto left64x2;
   5183             case ARM64vecshi_SSHR32x4:    tmpl = tmpl_SSHR;     goto right32x4;
   5184             case ARM64vecshi_USHR32x4:    tmpl = tmpl_USHR;     goto right32x4;
   5185             case ARM64vecshi_UQSHRN2SD:   tmpl = tmpl_UQSHRN;   goto right32x4;
   5186             case ARM64vecshi_SQSHRN2SD:   tmpl = tmpl_SQSHRN;   goto right32x4;
   5187             case ARM64vecshi_SQSHRUN2SD:  tmpl = tmpl_SQSHRUN;  goto right32x4;
   5188             case ARM64vecshi_UQRSHRN2SD:  tmpl = tmpl_UQRSHRN;  goto right32x4;
   5189             case ARM64vecshi_SQRSHRN2SD:  tmpl = tmpl_SQRSHRN;  goto right32x4;
   5190             case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
   5191             case ARM64vecshi_SHL32x4:     tmpl = tmpl_SHL;      goto left32x4;
   5192             case ARM64vecshi_UQSHL32x4:   tmpl = tmpl_UQSHL;    goto left32x4;
   5193             case ARM64vecshi_SQSHL32x4:   tmpl = tmpl_SQSHL;    goto left32x4;
   5194             case ARM64vecshi_SQSHLU32x4:  tmpl = tmpl_SQSHLU;   goto left32x4;
   5195             case ARM64vecshi_SSHR16x8:    tmpl = tmpl_SSHR;     goto right16x8;
   5196             case ARM64vecshi_USHR16x8:    tmpl = tmpl_USHR;     goto right16x8;
   5197             case ARM64vecshi_UQSHRN4HS:   tmpl = tmpl_UQSHRN;   goto right16x8;
   5198             case ARM64vecshi_SQSHRN4HS:   tmpl = tmpl_SQSHRN;   goto right16x8;
   5199             case ARM64vecshi_SQSHRUN4HS:  tmpl = tmpl_SQSHRUN;  goto right16x8;
   5200             case ARM64vecshi_UQRSHRN4HS:  tmpl = tmpl_UQRSHRN;  goto right16x8;
   5201             case ARM64vecshi_SQRSHRN4HS:  tmpl = tmpl_SQRSHRN;  goto right16x8;
   5202             case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
   5203             case ARM64vecshi_SHL16x8:     tmpl = tmpl_SHL;      goto left16x8;
   5204             case ARM64vecshi_UQSHL16x8:   tmpl = tmpl_UQSHL;    goto left16x8;
   5205             case ARM64vecshi_SQSHL16x8:   tmpl = tmpl_SQSHL;    goto left16x8;
   5206             case ARM64vecshi_SQSHLU16x8:  tmpl = tmpl_SQSHLU;   goto left16x8;
   5207             case ARM64vecshi_SSHR8x16:    tmpl = tmpl_SSHR;     goto right8x16;
   5208             case ARM64vecshi_USHR8x16:    tmpl = tmpl_USHR;     goto right8x16;
   5209             case ARM64vecshi_UQSHRN8BH:   tmpl = tmpl_UQSHRN;   goto right8x16;
   5210             case ARM64vecshi_SQSHRN8BH:   tmpl = tmpl_SQSHRN;   goto right8x16;
   5211             case ARM64vecshi_SQSHRUN8BH:  tmpl = tmpl_SQSHRUN;  goto right8x16;
   5212             case ARM64vecshi_UQRSHRN8BH:  tmpl = tmpl_UQRSHRN;  goto right8x16;
   5213             case ARM64vecshi_SQRSHRN8BH:  tmpl = tmpl_SQRSHRN;  goto right8x16;
   5214             case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
   5215             case ARM64vecshi_SHL8x16:     tmpl = tmpl_SHL;      goto left8x16;
   5216             case ARM64vecshi_UQSHL8x16:   tmpl = tmpl_UQSHL;    goto left8x16;
   5217             case ARM64vecshi_SQSHL8x16:   tmpl = tmpl_SQSHL;    goto left8x16;
   5218             case ARM64vecshi_SQSHLU8x16:  tmpl = tmpl_SQSHLU;   goto left8x16;
   5219 
   5220             default: break;
   5221 
   5222             right64x2:
   5223                if (sh >= 1 && sh <= 63) {
   5224                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
   5225                   goto done;
   5226                }
   5227                break;
   5228             right32x4:
   5229                if (sh >= 1 && sh <= 32) {
   5230                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
   5231                   goto done;
   5232                }
   5233                break;
   5234             right16x8:
   5235                if (sh >= 1 && sh <= 16) {
   5236                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
   5237                   goto done;
   5238                }
   5239                break;
   5240             right8x16:
   5241                if (sh >= 1 && sh <= 8) {
   5242                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
   5243                   goto done;
   5244                }
   5245                break;
   5246 
   5247             left64x2:
   5248                if (sh >= 0 && sh <= 63) {
   5249                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
   5250                   goto done;
   5251                }
   5252                break;
   5253             left32x4:
   5254                if (sh >= 0 && sh <= 31) {
   5255                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
   5256                   goto done;
   5257                }
   5258                break;
   5259             left16x8:
   5260                if (sh >= 0 && sh <= 15) {
   5261                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
   5262                   goto done;
   5263                }
   5264                break;
   5265             left8x16:
   5266                if (sh >= 0 && sh <= 7) {
   5267                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
   5268                   goto done;
   5269                }
   5270                break;
   5271          }
   5272          goto bad;
   5273       }
   5274       case ARM64in_VExtV: {
   5275          /*
   5276             011 01110 000 m 0 imm4 0 n d  EXT Vd.16b, Vn.16b, Vm.16b, #imm4
   5277             where imm4 = the shift amount, in bytes,
   5278                   Vn is low operand, Vm is high operand
   5279          */
   5280          UInt vD   = qregEnc(i->ARM64in.VExtV.dst);
   5281          UInt vN   = qregEnc(i->ARM64in.VExtV.srcLo);
   5282          UInt vM   = qregEnc(i->ARM64in.VExtV.srcHi);
   5283          UInt imm4 = i->ARM64in.VExtV.amtB;
   5284          vassert(imm4 >= 1 && imm4 <= 15);
   5285          *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
   5286                               X000000 | (imm4 << 1), vN, vD);
   5287          goto done;
   5288       }
   5289       case ARM64in_VImmQ: {
   5290          UInt   rQ  = qregEnc(i->ARM64in.VImmQ.rQ);
   5291          UShort imm = i->ARM64in.VImmQ.imm;
   5292          vassert(rQ < 32);
   5293          switch (imm) {
   5294             case 0x0000:
   5295                // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
   5296                *p++ = 0x4F000400 | rQ;
   5297                goto done;
   5298             case 0x0001:
   5299                // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
   5300                *p++ = 0x2F00E420 | rQ;
   5301                goto done;
   5302             case 0x0003:
   5303                // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
   5304                *p++ = 0x2F00E460 | rQ;
   5305                goto done;
   5306             case 0x000F:
   5307                // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
   5308                *p++ = 0x2F00E5E0 | rQ;
   5309                goto done;
   5310             case 0x003F:
   5311                // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
   5312                *p++ = 0x2F01E7E0 | rQ;
   5313                goto done;
   5314             case 0x00FF:
   5315                // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
   5316                *p++ = 0x2F07E7E0 | rQ;
   5317                goto done;
   5318             case 0xFFFF:
   5319                // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
   5320                *p++ = 0x6F000400 | rQ;
   5321                goto done;
   5322             default:
   5323                break;
   5324          }
   5325          goto bad; /* no other handled cases right now */
   5326       }
   5327 
   5328       case ARM64in_VDfromX: {
   5329          /* INS Vd.D[0], rX
   5330             0100 1110 0000 1000 0001 11 nn dd   INS Vd.D[0], Xn
   5331             This isn't wonderful, in the sense that the upper half of
   5332             the vector register stays unchanged and thus the insn is
   5333             data dependent on its output register. */
   5334          UInt dd = dregEnc(i->ARM64in.VDfromX.rD);
   5335          UInt xx = iregEnc(i->ARM64in.VDfromX.rX);
   5336          vassert(xx < 31);
   5337          *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
   5338          goto done;
   5339       }
   5340 
   5341       case ARM64in_VQfromX: {
   5342          /* FMOV D, X
   5343             1001 1110 0110 0111 0000 00 nn dd   FMOV Vd.D[0], Xn
   5344             I think this zeroes out the top half of the destination, which
   5345             is what we need.  TODO: can we do VDfromX and VQfromXX better? */
   5346          UInt dd = qregEnc(i->ARM64in.VQfromX.rQ);
   5347          UInt xx = iregEnc(i->ARM64in.VQfromX.rXlo);
   5348          vassert(xx < 31);
   5349          *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
   5350          goto done;
   5351       }
   5352 
   5353       case ARM64in_VQfromXX: {
   5354          /* What we really generate is a two insn sequence:
   5355                INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
   5356             0100 1110 0000 1000 0001 11 nn dd   INS Vd.D[0], Xn
   5357             0100 1110 0001 1000 0001 11 nn dd   INS Vd.D[1], Xn
   5358          */
   5359          UInt qq  = qregEnc(i->ARM64in.VQfromXX.rQ);
   5360          UInt xhi = iregEnc(i->ARM64in.VQfromXX.rXhi);
   5361          UInt xlo = iregEnc(i->ARM64in.VQfromXX.rXlo);
   5362          vassert(xhi < 31 && xlo < 31);
   5363          *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
   5364          *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
   5365          goto done;
   5366       }
   5367 
   5368       case ARM64in_VXfromQ: {
   5369          /* 010 0111 0000 01000 001111 nn dd  UMOV Xd, Vn.D[0]
   5370             010 0111 0000 11000 001111 nn dd  UMOV Xd, Vn.D[1]
   5371          */
   5372          UInt dd     = iregEnc(i->ARM64in.VXfromQ.rX);
   5373          UInt nn     = qregEnc(i->ARM64in.VXfromQ.rQ);
   5374          UInt laneNo = i->ARM64in.VXfromQ.laneNo;
   5375          vassert(dd < 31);
   5376          vassert(laneNo < 2);
   5377          *p++ = X_3_8_5_6_5_5(X010, X01110000,
   5378                               laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
   5379          goto done;
   5380       }
   5381 
   5382       case ARM64in_VXfromDorS: {
   5383          /* 000 11110001 00110 000000 n d     FMOV Wd, Sn
   5384             100 11110011 00110 000000 n d     FMOV Xd, Dn
   5385          */
   5386          UInt dd    = iregEnc(i->ARM64in.VXfromDorS.rX);
   5387          UInt nn    = dregEnc(i->ARM64in.VXfromDorS.rDorS);
   5388          Bool fromD = i->ARM64in.VXfromDorS.fromD;
   5389          vassert(dd < 31);
   5390          *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
   5391                               fromD ? X11110011 : X11110001,
   5392                               X00110, X000000, nn, dd);
   5393          goto done;
   5394       }
   5395 
   5396       case ARM64in_VMov: {
   5397          /* 000 11110 00 10000 00 10000 n d   FMOV Sd, Sn
   5398             000 11110 01 10000 00 10000 n d   FMOV Dd, Dn
   5399             010 01110 10 1 n    0 00111 n d   MOV Vd.16b, Vn.16b
   5400          */
   5401         HReg rD = i->ARM64in.VMov.dst;
   5402         HReg rN = i->ARM64in.VMov.src;
   5403         switch (i->ARM64in.VMov.szB) {
   5404            case 16: {
   5405               UInt dd = qregEnc(rD);
   5406               UInt nn = qregEnc(rN);
   5407               *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
   5408               goto done;
   5409            }
   5410            case 8: {
   5411               UInt dd = dregEnc(rD);
   5412               UInt nn = dregEnc(rN);
   5413               *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
   5414               goto done;
   5415            }
   5416            default:
   5417               break;
   5418         }
   5419         goto bad;
   5420       }
   5421 
   5422       case ARM64in_EvCheck: {
   5423          /* The sequence is fixed (canned) except for the two amodes
   5424             supplied by the insn.  These don't change the length, though.
   5425             We generate:
   5426                ldr  w9, [x21 + #8]   8 == offsetof(host_EvC_COUNTER)
   5427                subs w9, w9, #1
   5428                str  w9, [x21 + #8]   8 == offsetof(host_EvC_COUNTER)
   5429                bpl  nofail
   5430                ldr  x9, [x21 + #0]   0 == offsetof(host_EvC_FAILADDR)
   5431                br   x9
   5432               nofail:
   5433          */
   5434          UInt* p0 = p;
   5435          p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
   5436                                 i->ARM64in.EvCheck.amCounter);
   5437          *p++ = 0x71000529; /* subs w9, w9, #1 */
   5438          p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
   5439                                 i->ARM64in.EvCheck.amCounter);
   5440          *p++ = 0x54000065; /* bpl nofail */
   5441          p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
   5442                                 i->ARM64in.EvCheck.amFailAddr);
   5443          *p++ = 0xD61F0120; /* br x9 */
   5444          /* nofail: */
   5445 
   5446          /* Crosscheck */
   5447          vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
   5448          goto done;
   5449       }
   5450 
   5451       case ARM64in_ProfInc: {
   5452          /* We generate:
   5453               (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
   5454               expectation that a later call to LibVEX_patchProfCtr
   5455               will be used to fill in the immediate fields once the
   5456               right value is known.)
   5457             imm64-exactly4 x9, 0x6555'7555'8555'9566
   5458             ldr  x8, [x9]
   5459             add  x8, x8, #1
   5460             str  x8, [x9]
   5461          */
   5462          p = imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
   5463          *p++ = 0xF9400128;
   5464          *p++ = 0x91000508;
   5465          *p++ = 0xF9000128;
   5466          /* Tell the caller .. */
   5467          vassert(!(*is_profInc));
   5468          *is_profInc = True;
   5469          goto done;
   5470       }
   5471 
   5472       /* ... */
   5473       default:
   5474          goto bad;
   5475     }
   5476 
   5477   bad:
   5478    ppARM64Instr(i);
   5479    vpanic("emit_ARM64Instr");
   5480    /*NOTREACHED*/
   5481 
   5482   done:
   5483    vassert(((UChar*)p) - &buf[0] <= 36);
   5484    return ((UChar*)p) - &buf[0];
   5485 }
   5486 
   5487 
   5488 /* How big is an event check?  See case for ARM64in_EvCheck in
   5489    emit_ARM64Instr just above.  That crosschecks what this returns, so
   5490    we can tell if we're inconsistent. */
   5491 Int evCheckSzB_ARM64 (void)
   5492 {
   5493    return 24;
   5494 }
   5495 
   5496 
   5497 /* NB: what goes on here has to be very closely coordinated with the
   5498    emitInstr case for XDirect, above. */
   5499 VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
   5500                                    void* place_to_chain,
   5501                                    const void* disp_cp_chain_me_EXPECTED,
   5502                                    const void* place_to_jump_to )
   5503 {
   5504    vassert(endness_host == VexEndnessLE);
   5505 
   5506    /* What we're expecting to see is:
   5507         movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
   5508         movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
   5509         movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
   5510         movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
   5511         blr  x9
   5512       viz
   5513         <16 bytes generated by imm64_to_ireg_EXACTLY4>
   5514         D6 3F 01 20
   5515    */
   5516    UInt* p = (UInt*)place_to_chain;
   5517    vassert(0 == (3 & (HWord)p));
   5518    vassert(is_imm64_to_ireg_EXACTLY4(
   5519               p, /*x*/9, (Addr)disp_cp_chain_me_EXPECTED));
   5520    vassert(p[4] == 0xD63F0120);
   5521 
   5522    /* And what we want to change it to is:
   5523         movw x9, place_to_jump_to[15:0]
   5524         movk x9, place_to_jump_to[31:15], lsl 16
   5525         movk x9, place_to_jump_to[47:32], lsl 32
   5526         movk x9, place_to_jump_to[63:48], lsl 48
   5527         br   x9
   5528       viz
   5529         <16 bytes generated by imm64_to_ireg_EXACTLY4>
   5530         D6 1F 01 20
   5531 
   5532       The replacement has the same length as the original.
   5533    */
   5534    (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)place_to_jump_to);
   5535    p[4] = 0xD61F0120;
   5536 
   5537    VexInvalRange vir = {(HWord)p, 20};
   5538    return vir;
   5539 }
   5540 
   5541 
   5542 /* NB: what goes on here has to be very closely coordinated with the
   5543    emitInstr case for XDirect, above. */
   5544 VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
   5545                                      void* place_to_unchain,
   5546                                      const void* place_to_jump_to_EXPECTED,
   5547                                      const void* disp_cp_chain_me )
   5548 {
   5549    vassert(endness_host == VexEndnessLE);
   5550 
   5551    /* What we're expecting to see is:
   5552         movw x9, place_to_jump_to_EXPECTED[15:0]
   5553         movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
   5554         movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
   5555         movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
   5556         br   x9
   5557       viz
   5558         <16 bytes generated by imm64_to_ireg_EXACTLY4>
   5559         D6 1F 01 20
   5560    */
   5561    UInt* p = (UInt*)place_to_unchain;
   5562    vassert(0 == (3 & (HWord)p));
   5563    vassert(is_imm64_to_ireg_EXACTLY4(
   5564               p, /*x*/9, (Addr)place_to_jump_to_EXPECTED));
   5565    vassert(p[4] == 0xD61F0120);
   5566 
   5567    /* And what we want to change it to is:
   5568         movw x9, disp_cp_chain_me_to[15:0]
   5569         movk x9, disp_cp_chain_me_to[31:15], lsl 16
   5570         movk x9, disp_cp_chain_me_to[47:32], lsl 32
   5571         movk x9, disp_cp_chain_me_to[63:48], lsl 48
   5572         blr  x9
   5573       viz
   5574         <16 bytes generated by imm64_to_ireg_EXACTLY4>
   5575         D6 3F 01 20
   5576    */
   5577    (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
   5578    p[4] = 0xD63F0120;
   5579 
   5580    VexInvalRange vir = {(HWord)p, 20};
   5581    return vir;
   5582 }
   5583 
   5584 
   5585 /* Patch the counter address into a profile inc point, as previously
   5586    created by the ARM64in_ProfInc case for emit_ARM64Instr. */
   5587 VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
   5588                                    void*  place_to_patch,
   5589                                    const ULong* location_of_counter )
   5590 {
   5591    vassert(sizeof(ULong*) == 8);
   5592    vassert(endness_host == VexEndnessLE);
   5593    UInt* p = (UInt*)place_to_patch;
   5594    vassert(0 == (3 & (HWord)p));
   5595    vassert(is_imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
   5596    vassert(p[4] == 0xF9400128);
   5597    vassert(p[5] == 0x91000508);
   5598    vassert(p[6] == 0xF9000128);
   5599    imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)location_of_counter);
   5600    VexInvalRange vir = {(HWord)p, 4*4};
   5601    return vir;
   5602 }
   5603 
   5604 /*---------------------------------------------------------------*/
   5605 /*--- end                                   host_arm64_defs.c ---*/
   5606 /*---------------------------------------------------------------*/
   5607