Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                 host_arm64_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2013-2013 OpenWorks
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_basictypes.h"
     32 #include "libvex.h"
     33 #include "libvex_trc_values.h"
     34 
     35 #include "main_util.h"
     36 #include "host_generic_regs.h"
     37 #include "host_arm64_defs.h"
     38 
     39 //ZZ UInt arm_hwcaps = 0;
     40 
     41 
     42 /* --------- Registers. --------- */
     43 
     44 /* The usual HReg abstraction.  We use the following classes only:
     45      X regs (64 bit int)
     46      D regs (64 bit float, also used for 32 bit float)
     47      Q regs (128 bit vector)
     48 */
     49 
     50 void ppHRegARM64 ( HReg reg )  {
     51    Int r;
     52    /* Be generic for all virtual regs. */
     53    if (hregIsVirtual(reg)) {
     54       ppHReg(reg);
     55       return;
     56    }
     57    /* But specific for real regs. */
     58    switch (hregClass(reg)) {
     59       case HRcInt64:
     60          r = hregNumber(reg);
     61          vassert(r >= 0 && r < 31);
     62          vex_printf("x%d", r);
     63          return;
     64       case HRcFlt64:
     65          r = hregNumber(reg);
     66          vassert(r >= 0 && r < 32);
     67          vex_printf("d%d", r);
     68          return;
     69       case HRcVec128:
     70          r = hregNumber(reg);
     71          vassert(r >= 0 && r < 32);
     72          vex_printf("q%d", r);
     73          return;
     74       default:
     75          vpanic("ppHRegARM64");
     76    }
     77 }
     78 
     79 static void ppHRegARM64asSreg ( HReg reg ) {
     80    ppHRegARM64(reg);
     81    vex_printf("(S-reg)");
     82 }
     83 
     84 HReg hregARM64_X0  ( void ) { return mkHReg(0,  HRcInt64, False); }
     85 HReg hregARM64_X1  ( void ) { return mkHReg(1,  HRcInt64, False); }
     86 HReg hregARM64_X2  ( void ) { return mkHReg(2,  HRcInt64, False); }
     87 HReg hregARM64_X3  ( void ) { return mkHReg(3,  HRcInt64, False); }
     88 HReg hregARM64_X4  ( void ) { return mkHReg(4,  HRcInt64, False); }
     89 HReg hregARM64_X5  ( void ) { return mkHReg(5,  HRcInt64, False); }
     90 HReg hregARM64_X6  ( void ) { return mkHReg(6,  HRcInt64, False); }
     91 HReg hregARM64_X7  ( void ) { return mkHReg(7,  HRcInt64, False); }
     92 //ZZ HReg hregARM_R8  ( void ) { return mkHReg(8,  HRcInt32, False); }
     93 HReg hregARM64_X9  ( void ) { return mkHReg(9,  HRcInt64, False); }
     94 HReg hregARM64_X10 ( void ) { return mkHReg(10, HRcInt64, False); }
     95 HReg hregARM64_X11 ( void ) { return mkHReg(11, HRcInt64, False); }
     96 HReg hregARM64_X12 ( void ) { return mkHReg(12, HRcInt64, False); }
     97 HReg hregARM64_X13 ( void ) { return mkHReg(13, HRcInt64, False); }
     98 HReg hregARM64_X14 ( void ) { return mkHReg(14, HRcInt64, False); }
     99 HReg hregARM64_X15 ( void ) { return mkHReg(15, HRcInt64, False); }
    100 HReg hregARM64_X21 ( void ) { return mkHReg(21, HRcInt64, False); }
    101 HReg hregARM64_X22 ( void ) { return mkHReg(22, HRcInt64, False); }
    102 HReg hregARM64_X23 ( void ) { return mkHReg(23, HRcInt64, False); }
    103 HReg hregARM64_X24 ( void ) { return mkHReg(24, HRcInt64, False); }
    104 HReg hregARM64_X25 ( void ) { return mkHReg(25, HRcInt64, False); }
    105 HReg hregARM64_X26 ( void ) { return mkHReg(26, HRcInt64, False); }
    106 HReg hregARM64_X27 ( void ) { return mkHReg(27, HRcInt64, False); }
    107 HReg hregARM64_X28 ( void ) { return mkHReg(28, HRcInt64, False); }
    108 
    109 // Should really use D8 .. D15 for class F64, since they are callee
    110 // save
    111 HReg hregARM64_D8  ( void ) { return mkHReg(8,  HRcFlt64, False); }
    112 HReg hregARM64_D9  ( void ) { return mkHReg(9,  HRcFlt64, False); }
    113 HReg hregARM64_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
    114 HReg hregARM64_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
    115 HReg hregARM64_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
    116 HReg hregARM64_D13 ( void ) { return mkHReg(13, HRcFlt64, False); }
    117 //ZZ HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
    118 //ZZ HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
    119 //ZZ HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
    120 //ZZ HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
    121 //ZZ HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
    122 HReg hregARM64_Q16 ( void ) { return mkHReg(16, HRcVec128, False); }
    123 HReg hregARM64_Q17 ( void ) { return mkHReg(17, HRcVec128, False); }
    124 HReg hregARM64_Q18 ( void ) { return mkHReg(18, HRcVec128, False); }
    125 //ZZ HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
    126 //ZZ HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
    127 //ZZ HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
    128 //ZZ HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
    129 //ZZ HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
    130 
    131 void getAllocableRegs_ARM64 ( Int* nregs, HReg** arr )
    132 {
    133    Int i = 0;
    134    *nregs = 24;
    135    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    136 
    137    // callee saves ones (22 to 28) are listed first, since we prefer
    138    // them if they're available
    139    (*arr)[i++] = hregARM64_X22();
    140    (*arr)[i++] = hregARM64_X23();
    141    (*arr)[i++] = hregARM64_X24();
    142    (*arr)[i++] = hregARM64_X25();
    143    (*arr)[i++] = hregARM64_X26();
    144    (*arr)[i++] = hregARM64_X27();
    145    (*arr)[i++] = hregARM64_X28();
    146 
    147    (*arr)[i++] = hregARM64_X0();
    148    (*arr)[i++] = hregARM64_X1();
    149    (*arr)[i++] = hregARM64_X2();
    150    (*arr)[i++] = hregARM64_X3();
    151    (*arr)[i++] = hregARM64_X4();
    152    (*arr)[i++] = hregARM64_X5();
    153    (*arr)[i++] = hregARM64_X6();
    154    (*arr)[i++] = hregARM64_X7();
    155    // X8 .. who knows.
    156    // X9 is a chaining/spill temporary, not available to regalloc.
    157 
    158    // Do we really need all these?
    159    //(*arr)[i++] = hregARM64_X10();
    160    //(*arr)[i++] = hregARM64_X11();
    161    //(*arr)[i++] = hregARM64_X12();
    162    //(*arr)[i++] = hregARM64_X13();
    163    //(*arr)[i++] = hregARM64_X14();
    164    //(*arr)[i++] = hregARM64_X15();
    165    // X21 is the guest state pointer, not available to regalloc.
    166 
    167    // vector regs.  Unfortunately not callee-saved.
    168    (*arr)[i++] = hregARM64_Q16();
    169    (*arr)[i++] = hregARM64_Q17();
    170    (*arr)[i++] = hregARM64_Q18();
    171 
    172    // F64 regs, all of which are callee-saved
    173    (*arr)[i++] = hregARM64_D8();
    174    (*arr)[i++] = hregARM64_D9();
    175    (*arr)[i++] = hregARM64_D10();
    176    (*arr)[i++] = hregARM64_D11();
    177    (*arr)[i++] = hregARM64_D12();
    178    (*arr)[i++] = hregARM64_D13();
    179 
    180    // unavail: x21 as GSP
    181    // x9 is used as a spill/reload/chaining/call temporary
    182    // x8 is unassigned
    183    // x30 as LR
    184    // x31 because dealing with the SP-vs-ZR overloading is too
    185    // confusing, and we don't need to do so, so let's just avoid
    186    // the problem
    187    //
    188    // Currently, we have 15 allocatable integer registers:
    189    // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
    190    //
    191    // Hence for the allocatable integer registers we have:
    192    //
    193    // callee-saved: 22 23 24 25 26 27 28
    194    // caller-saved: 0 1 2 3 4 5 6 7
    195    //
    196    // If the set of available registers changes or if the e/r status
    197    // changes, be sure to re-check/sync the definition of
    198    // getHRegUsage for ARMInstr_Call too.
    199    vassert(i == *nregs);
    200 }
    201 
    202 
    203 /* --------- Condition codes, ARM64 encoding. --------- */
    204 
    205 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
    206    switch (cond) {
    207        case ARM64cc_EQ:  return "eq";
    208        case ARM64cc_NE:  return "ne";
    209        case ARM64cc_CS:  return "cs";
    210        case ARM64cc_CC:  return "cc";
    211        case ARM64cc_MI:  return "mi";
    212        case ARM64cc_PL:  return "pl";
    213        case ARM64cc_VS:  return "vs";
    214        case ARM64cc_VC:  return "vc";
    215        case ARM64cc_HI:  return "hi";
    216        case ARM64cc_LS:  return "ls";
    217        case ARM64cc_GE:  return "ge";
    218        case ARM64cc_LT:  return "lt";
    219        case ARM64cc_GT:  return "gt";
    220        case ARM64cc_LE:  return "le";
    221        case ARM64cc_AL:  return "al"; // default
    222        case ARM64cc_NV:  return "nv";
    223        default: vpanic("showARM64CondCode");
    224    }
    225 }
    226 
    227 
    228 /* --------- Memory address expressions (amodes). --------- */
    229 
    230 ARM64AMode* ARM64AMode_RI9  ( HReg reg, Int simm9 ) {
    231    ARM64AMode* am        = LibVEX_Alloc(sizeof(ARM64AMode));
    232    am->tag               = ARM64am_RI9;
    233    am->ARM64am.RI9.reg   = reg;
    234    am->ARM64am.RI9.simm9 = simm9;
    235    vassert(-256 <= simm9 && simm9 <= 255);
    236    return am;
    237 }
    238 
    239 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
    240    ARM64AMode* am          = LibVEX_Alloc(sizeof(ARM64AMode));
    241    am->tag                 = ARM64am_RI12;
    242    am->ARM64am.RI12.reg    = reg;
    243    am->ARM64am.RI12.uimm12 = uimm12;
    244    am->ARM64am.RI12.szB    = szB;
    245    vassert(uimm12 >= 0 && uimm12 <= 4095);
    246    switch (szB) {
    247       case 1: case 2: case 4: case 8: break;
    248       default: vassert(0);
    249    }
    250    return am;
    251 }
    252 
    253 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
    254    ARM64AMode* am       = LibVEX_Alloc(sizeof(ARM64AMode));
    255    am->tag              = ARM64am_RR;
    256    am->ARM64am.RR.base  = base;
    257    am->ARM64am.RR.index = index;
    258    return am;
    259 }
    260 
    261 static void ppARM64AMode ( ARM64AMode* am ) {
    262    switch (am->tag) {
    263       case ARM64am_RI9:
    264          vex_printf("%d(", am->ARM64am.RI9.simm9);
    265          ppHRegARM64(am->ARM64am.RI9.reg);
    266          vex_printf(")");
    267          break;
    268       case ARM64am_RI12:
    269          vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
    270                            * (UInt)am->ARM64am.RI12.uimm12);
    271          ppHRegARM64(am->ARM64am.RI12.reg);
    272          vex_printf(")");
    273          break;
    274       case ARM64am_RR:
    275          vex_printf("(");
    276          ppHRegARM64(am->ARM64am.RR.base);
    277          vex_printf(",");
    278          ppHRegARM64(am->ARM64am.RR.index);
    279          vex_printf(")");
    280          break;
    281       default:
    282          vassert(0);
    283    }
    284 }
    285 
    286 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
    287    switch (am->tag) {
    288       case ARM64am_RI9:
    289          addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
    290          return;
    291       case ARM64am_RI12:
    292          addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
    293          return;
    294       case ARM64am_RR:
    295          addHRegUse(u, HRmRead, am->ARM64am.RR.base);
    296          addHRegUse(u, HRmRead, am->ARM64am.RR.index);
    297          return;
    298       default:
    299          vpanic("addRegUsage_ARM64Amode");
    300    }
    301 }
    302 
    303 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
    304    switch (am->tag) {
    305       case ARM64am_RI9:
    306          am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
    307          return;
    308       case ARM64am_RI12:
    309          am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
    310          return;
    311       case ARM64am_RR:
    312          am->ARM64am.RR.base  = lookupHRegRemap(m, am->ARM64am.RR.base);
    313          am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
    314          return;
    315       default:
    316          vpanic("mapRegs_ARM64Amode");
    317    }
    318 }
    319 
    320 
    321 //ZZ /* --------- Mem AModes: Addressing Mode 2 --------- */
    322 //ZZ
    323 //ZZ ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
    324 //ZZ    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    325 //ZZ    am->tag             = ARMam2_RI;
    326 //ZZ    am->ARMam2.RI.reg   = reg;
    327 //ZZ    am->ARMam2.RI.simm9 = simm9;
    328 //ZZ    vassert(-255 <= simm9 && simm9 <= 255);
    329 //ZZ    return am;
    330 //ZZ }
    331 //ZZ ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
    332 //ZZ    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    333 //ZZ    am->tag             = ARMam2_RR;
    334 //ZZ    am->ARMam2.RR.base  = base;
    335 //ZZ    am->ARMam2.RR.index = index;
    336 //ZZ    return am;
    337 //ZZ }
    338 //ZZ
    339 //ZZ void ppARMAMode2 ( ARMAMode2* am ) {
    340 //ZZ    switch (am->tag) {
    341 //ZZ       case ARMam2_RI:
    342 //ZZ          vex_printf("%d(", am->ARMam2.RI.simm9);
    343 //ZZ          ppHRegARM(am->ARMam2.RI.reg);
    344 //ZZ          vex_printf(")");
    345 //ZZ          break;
    346 //ZZ       case ARMam2_RR:
    347 //ZZ          vex_printf("(");
    348 //ZZ          ppHRegARM(am->ARMam2.RR.base);
    349 //ZZ          vex_printf(",");
    350 //ZZ          ppHRegARM(am->ARMam2.RR.index);
    351 //ZZ          vex_printf(")");
    352 //ZZ          break;
    353 //ZZ       default:
    354 //ZZ          vassert(0);
    355 //ZZ    }
    356 //ZZ }
    357 //ZZ
    358 //ZZ static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
    359 //ZZ    switch (am->tag) {
    360 //ZZ       case ARMam2_RI:
    361 //ZZ          addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
    362 //ZZ          return;
    363 //ZZ       case ARMam2_RR:
    364 //ZZ          //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
    365 //ZZ          //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
    366 //ZZ          //   return;
    367 //ZZ       default:
    368 //ZZ          vpanic("addRegUsage_ARMAmode2");
    369 //ZZ    }
    370 //ZZ }
    371 //ZZ
    372 //ZZ static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
    373 //ZZ    switch (am->tag) {
    374 //ZZ       case ARMam2_RI:
    375 //ZZ          am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
    376 //ZZ          return;
    377 //ZZ       case ARMam2_RR:
    378 //ZZ          //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
    379 //ZZ          //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
    380 //ZZ          //return;
    381 //ZZ       default:
    382 //ZZ          vpanic("mapRegs_ARMAmode2");
    383 //ZZ    }
    384 //ZZ }
    385 //ZZ
    386 //ZZ
    387 //ZZ /* --------- Mem AModes: Addressing Mode VFP --------- */
    388 //ZZ
    389 //ZZ ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
    390 //ZZ    ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
    391 //ZZ    vassert(simm11 >= -1020 && simm11 <= 1020);
    392 //ZZ    vassert(0 == (simm11 & 3));
    393 //ZZ    am->reg    = reg;
    394 //ZZ    am->simm11 = simm11;
    395 //ZZ    return am;
    396 //ZZ }
    397 //ZZ
    398 //ZZ void ppARMAModeV ( ARMAModeV* am ) {
    399 //ZZ    vex_printf("%d(", am->simm11);
    400 //ZZ    ppHRegARM(am->reg);
    401 //ZZ    vex_printf(")");
    402 //ZZ }
    403 //ZZ
    404 //ZZ static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
    405 //ZZ    addHRegUse(u, HRmRead, am->reg);
    406 //ZZ }
    407 //ZZ
    408 //ZZ static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
    409 //ZZ    am->reg = lookupHRegRemap(m, am->reg);
    410 //ZZ }
    411 //ZZ
    412 //ZZ
    413 //ZZ /* --------- Mem AModes: Addressing Mode Neon ------- */
    414 //ZZ
    415 //ZZ ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
    416 //ZZ    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    417 //ZZ    am->tag = ARMamN_RR;
    418 //ZZ    am->ARMamN.RR.rN = rN;
    419 //ZZ    am->ARMamN.RR.rM = rM;
    420 //ZZ    return am;
    421 //ZZ }
    422 //ZZ
    423 //ZZ ARMAModeN *mkARMAModeN_R ( HReg rN ) {
    424 //ZZ    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    425 //ZZ    am->tag = ARMamN_R;
    426 //ZZ    am->ARMamN.R.rN = rN;
    427 //ZZ    return am;
    428 //ZZ }
    429 //ZZ
    430 //ZZ static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
    431 //ZZ    if (am->tag == ARMamN_R) {
    432 //ZZ       addHRegUse(u, HRmRead, am->ARMamN.R.rN);
    433 //ZZ    } else {
    434 //ZZ       addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
    435 //ZZ       addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
    436 //ZZ    }
    437 //ZZ }
    438 //ZZ
    439 //ZZ static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
    440 //ZZ    if (am->tag == ARMamN_R) {
    441 //ZZ       am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
    442 //ZZ    } else {
    443 //ZZ       am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
    444 //ZZ       am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
    445 //ZZ    }
    446 //ZZ }
    447 //ZZ
    448 //ZZ void ppARMAModeN ( ARMAModeN* am ) {
    449 //ZZ    vex_printf("[");
    450 //ZZ    if (am->tag == ARMamN_R) {
    451 //ZZ       ppHRegARM(am->ARMamN.R.rN);
    452 //ZZ    } else {
    453 //ZZ       ppHRegARM(am->ARMamN.RR.rN);
    454 //ZZ    }
    455 //ZZ    vex_printf("]");
    456 //ZZ    if (am->tag == ARMamN_RR) {
    457 //ZZ       vex_printf(", ");
    458 //ZZ       ppHRegARM(am->ARMamN.RR.rM);
    459 //ZZ    }
    460 //ZZ }
    461 
    462 
    463 /* --------- Reg or uimm12<<{0,12} operands --------- */
    464 
    465 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
    466    ARM64RIA* riA           = LibVEX_Alloc(sizeof(ARM64RIA));
    467    riA->tag                = ARM64riA_I12;
    468    riA->ARM64riA.I12.imm12 = imm12;
    469    riA->ARM64riA.I12.shift = shift;
    470    vassert(imm12 < 4096);
    471    vassert(shift == 0 || shift == 12);
    472    return riA;
    473 }
    474 ARM64RIA* ARM64RIA_R ( HReg reg ) {
    475    ARM64RIA* riA       = LibVEX_Alloc(sizeof(ARM64RIA));
    476    riA->tag            = ARM64riA_R;
    477    riA->ARM64riA.R.reg = reg;
    478    return riA;
    479 }
    480 
    481 static void ppARM64RIA ( ARM64RIA* riA ) {
    482    switch (riA->tag) {
    483       case ARM64riA_I12:
    484          vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
    485                                  << riA->ARM64riA.I12.shift));
    486          break;
    487       case ARM64riA_R:
    488          ppHRegARM64(riA->ARM64riA.R.reg);
    489          break;
    490       default:
    491          vassert(0);
    492    }
    493 }
    494 
    495 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
    496    switch (riA->tag) {
    497       case ARM64riA_I12:
    498          return;
    499       case ARM64riA_R:
    500          addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
    501          return;
    502       default:
    503          vpanic("addRegUsage_ARM64RIA");
    504    }
    505 }
    506 
    507 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
    508    switch (riA->tag) {
    509       case ARM64riA_I12:
    510          return;
    511       case ARM64riA_R:
    512          riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
    513          return;
    514       default:
    515          vpanic("mapRegs_ARM64RIA");
    516    }
    517 }
    518 
    519 
    520 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
    521 
    522 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
    523    ARM64RIL* riL          = LibVEX_Alloc(sizeof(ARM64RIL));
    524    riL->tag               = ARM64riL_I13;
    525    riL->ARM64riL.I13.bitN = bitN;
    526    riL->ARM64riL.I13.immR = immR;
    527    riL->ARM64riL.I13.immS = immS;
    528    vassert(bitN < 2);
    529    vassert(immR < 64);
    530    vassert(immS < 64);
    531    return riL;
    532 }
    533 ARM64RIL* ARM64RIL_R ( HReg reg ) {
    534    ARM64RIL* riL       = LibVEX_Alloc(sizeof(ARM64RIL));
    535    riL->tag            = ARM64riL_R;
    536    riL->ARM64riL.R.reg = reg;
    537    return riL;
    538 }
    539 
    540 static void ppARM64RIL ( ARM64RIL* riL ) {
    541    switch (riL->tag) {
    542       case ARM64riL_I13:
    543          vex_printf("#nrs(%u,%u,%u)",
    544                      (UInt)riL->ARM64riL.I13.bitN,
    545                      (UInt)riL->ARM64riL.I13.immR,
    546                      (UInt)riL->ARM64riL.I13.immS);
    547          break;
    548       case ARM64riL_R:
    549          ppHRegARM64(riL->ARM64riL.R.reg);
    550          break;
    551       default:
    552          vassert(0);
    553    }
    554 }
    555 
    556 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
    557    switch (riL->tag) {
    558       case ARM64riL_I13:
    559          return;
    560       case ARM64riL_R:
    561          addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
    562          return;
    563       default:
    564          vpanic("addRegUsage_ARM64RIL");
    565    }
    566 }
    567 
    568 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
    569    switch (riL->tag) {
    570       case ARM64riL_I13:
    571          return;
    572       case ARM64riL_R:
    573          riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
    574          return;
    575       default:
    576          vpanic("mapRegs_ARM64RIL");
    577    }
    578 }
    579 
    580 
    581 /* --------------- Reg or uimm6 operands --------------- */
    582 
    583 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
    584    ARM64RI6* ri6         = LibVEX_Alloc(sizeof(ARM64RI6));
    585    ri6->tag              = ARM64ri6_I6;
    586    ri6->ARM64ri6.I6.imm6 = imm6;
    587    vassert(imm6 > 0 && imm6 < 64);
    588    return ri6;
    589 }
    590 ARM64RI6* ARM64RI6_R ( HReg reg ) {
    591    ARM64RI6* ri6       = LibVEX_Alloc(sizeof(ARM64RI6));
    592    ri6->tag            = ARM64ri6_R;
    593    ri6->ARM64ri6.R.reg = reg;
    594    return ri6;
    595 }
    596 
    597 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
    598    switch (ri6->tag) {
    599       case ARM64ri6_I6:
    600          vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
    601          break;
    602       case ARM64ri6_R:
    603          ppHRegARM64(ri6->ARM64ri6.R.reg);
    604          break;
    605       default:
    606          vassert(0);
    607    }
    608 }
    609 
    610 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
    611    switch (ri6->tag) {
    612       case ARM64ri6_I6:
    613          return;
    614       case ARM64ri6_R:
    615          addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
    616          return;
    617       default:
    618          vpanic("addRegUsage_ARM64RI6");
    619    }
    620 }
    621 
    622 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
    623    switch (ri6->tag) {
    624       case ARM64ri6_I6:
    625          return;
    626       case ARM64ri6_R:
    627          ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
    628          return;
    629       default:
    630          vpanic("mapRegs_ARM64RI6");
    631    }
    632 }
    633 
    634 
    635 //ZZ /* -------- Neon Immediate operatnd --------- */
    636 //ZZ
    637 //ZZ ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
    638 //ZZ    ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
    639 //ZZ    i->type = type;
    640 //ZZ    i->imm8 = imm8;
    641 //ZZ    return i;
    642 //ZZ }
    643 //ZZ
    644 //ZZ ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
    645 //ZZ    int i, j;
    646 //ZZ    ULong y, x = imm->imm8;
    647 //ZZ    switch (imm->type) {
    648 //ZZ       case 3:
    649 //ZZ          x = x << 8; /* fallthrough */
    650 //ZZ       case 2:
    651 //ZZ          x = x << 8; /* fallthrough */
    652 //ZZ       case 1:
    653 //ZZ          x = x << 8; /* fallthrough */
    654 //ZZ       case 0:
    655 //ZZ          return (x << 32) | x;
    656 //ZZ       case 5:
    657 //ZZ       case 6:
    658 //ZZ          if (imm->type == 5)
    659 //ZZ             x = x << 8;
    660 //ZZ          else
    661 //ZZ             x = (x << 8) | x;
    662 //ZZ          /* fallthrough */
    663 //ZZ       case 4:
    664 //ZZ          x = (x << 16) | x;
    665 //ZZ          return (x << 32) | x;
    666 //ZZ       case 8:
    667 //ZZ          x = (x << 8) | 0xFF;
    668 //ZZ          /* fallthrough */
    669 //ZZ       case 7:
    670 //ZZ          x = (x << 8) | 0xFF;
    671 //ZZ          return (x << 32) | x;
    672 //ZZ       case 9:
    673 //ZZ          x = 0;
    674 //ZZ          for (i = 7; i >= 0; i--) {
    675 //ZZ             y = ((ULong)imm->imm8 >> i) & 1;
    676 //ZZ             for (j = 0; j < 8; j++) {
    677 //ZZ                x = (x << 1) | y;
    678 //ZZ             }
    679 //ZZ          }
    680 //ZZ          return x;
    681 //ZZ       case 10:
    682 //ZZ          x |= (x & 0x80) << 5;
    683 //ZZ          x |= (~x & 0x40) << 5;
    684 //ZZ          x &= 0x187F; /* 0001 1000 0111 1111 */
    685 //ZZ          x |= (x & 0x40) << 4;
    686 //ZZ          x |= (x & 0x40) << 3;
    687 //ZZ          x |= (x & 0x40) << 2;
    688 //ZZ          x |= (x & 0x40) << 1;
    689 //ZZ          x = x << 19;
    690 //ZZ          x = (x << 32) | x;
    691 //ZZ          return x;
    692 //ZZ       default:
    693 //ZZ          vpanic("ARMNImm_to_Imm64");
    694 //ZZ    }
    695 //ZZ }
    696 //ZZ
    697 //ZZ ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
    698 //ZZ    ARMNImm tmp;
    699 //ZZ    if ((x & 0xFFFFFFFF) == (x >> 32)) {
    700 //ZZ       if ((x & 0xFFFFFF00) == 0)
    701 //ZZ          return ARMNImm_TI(0, x & 0xFF);
    702 //ZZ       if ((x & 0xFFFF00FF) == 0)
    703 //ZZ          return ARMNImm_TI(1, (x >> 8) & 0xFF);
    704 //ZZ       if ((x & 0xFF00FFFF) == 0)
    705 //ZZ          return ARMNImm_TI(2, (x >> 16) & 0xFF);
    706 //ZZ       if ((x & 0x00FFFFFF) == 0)
    707 //ZZ          return ARMNImm_TI(3, (x >> 24) & 0xFF);
    708 //ZZ       if ((x & 0xFFFF00FF) == 0xFF)
    709 //ZZ          return ARMNImm_TI(7, (x >> 8) & 0xFF);
    710 //ZZ       if ((x & 0xFF00FFFF) == 0xFFFF)
    711 //ZZ          return ARMNImm_TI(8, (x >> 16) & 0xFF);
    712 //ZZ       if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
    713 //ZZ          if ((x & 0xFF00) == 0)
    714 //ZZ             return ARMNImm_TI(4, x & 0xFF);
    715 //ZZ          if ((x & 0x00FF) == 0)
    716 //ZZ             return ARMNImm_TI(5, (x >> 8) & 0xFF);
    717 //ZZ          if ((x & 0xFF) == ((x >> 8) & 0xFF))
    718 //ZZ             return ARMNImm_TI(6, x & 0xFF);
    719 //ZZ       }
    720 //ZZ       if ((x & 0x7FFFF) == 0) {
    721 //ZZ          tmp.type = 10;
    722 //ZZ          tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
    723 //ZZ          if (ARMNImm_to_Imm64(&tmp) == x)
    724 //ZZ             return ARMNImm_TI(tmp.type, tmp.imm8);
    725 //ZZ       }
    726 //ZZ    } else {
    727 //ZZ       /* This can only be type 9. */
    728 //ZZ       tmp.imm8 = (((x >> 56) & 1) << 7)
    729 //ZZ                | (((x >> 48) & 1) << 6)
    730 //ZZ                | (((x >> 40) & 1) << 5)
    731 //ZZ                | (((x >> 32) & 1) << 4)
    732 //ZZ                | (((x >> 24) & 1) << 3)
    733 //ZZ                | (((x >> 16) & 1) << 2)
    734 //ZZ                | (((x >>  8) & 1) << 1)
    735 //ZZ                | (((x >>  0) & 1) << 0);
    736 //ZZ       tmp.type = 9;
    737 //ZZ       if (ARMNImm_to_Imm64 (&tmp) == x)
    738 //ZZ          return ARMNImm_TI(tmp.type, tmp.imm8);
    739 //ZZ    }
    740 //ZZ    return NULL;
    741 //ZZ }
    742 //ZZ
    743 //ZZ void ppARMNImm (ARMNImm* i) {
    744 //ZZ    ULong x = ARMNImm_to_Imm64(i);
    745 //ZZ    vex_printf("0x%llX%llX", x, x);
    746 //ZZ }
    747 //ZZ
    748 //ZZ /* -- Register or scalar operand --- */
    749 //ZZ
    750 //ZZ ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
    751 //ZZ {
    752 //ZZ    ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
    753 //ZZ    p->tag = tag;
    754 //ZZ    p->reg = reg;
    755 //ZZ    p->index = index;
    756 //ZZ    return p;
    757 //ZZ }
    758 //ZZ
    759 //ZZ void ppARMNRS(ARMNRS *p)
    760 //ZZ {
    761 //ZZ    ppHRegARM(p->reg);
    762 //ZZ    if (p->tag == ARMNRS_Scalar) {
    763 //ZZ       vex_printf("[%d]", p->index);
    764 //ZZ    }
    765 //ZZ }
    766 
    767 /* --------- Instructions. --------- */
    768 
    769 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
    770    switch (op) {
    771       case ARM64lo_AND: return "and";
    772       case ARM64lo_OR:  return "orr";
    773       case ARM64lo_XOR: return "eor";
    774       default: vpanic("showARM64LogicOp");
    775    }
    776 }
    777 
    778 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
    779    switch (op) {
    780       case ARM64sh_SHL: return "lsl";
    781       case ARM64sh_SHR: return "lsr";
    782       case ARM64sh_SAR: return "asr";
    783       default: vpanic("showARM64ShiftOp");
    784    }
    785 }
    786 
    787 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
    788    switch (op) {
    789       case ARM64un_NEG: return "neg";
    790       case ARM64un_NOT: return "not";
    791       case ARM64un_CLZ: return "clz";
    792       default: vpanic("showARM64UnaryOp");
    793    }
    794 }
    795 
    796 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
    797    switch (op) {
    798       case ARM64mul_PLAIN: return "mul  ";
    799       case ARM64mul_ZX:    return "umulh";
    800       case ARM64mul_SX:    return "smulh";
    801       default: vpanic("showARM64MulOp");
    802    }
    803 }
    804 
    805 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
    806                                      /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
    807                                      ARM64CvtOp op ) {
    808    switch (op) {
    809       case ARM64cvt_F32_I32S:
    810          *syn = 's'; *fszB = 4; *iszB = 4; break;
    811       case ARM64cvt_F64_I32S:
    812          *syn = 's'; *fszB = 8; *iszB = 4; break;
    813       case ARM64cvt_F32_I64S:
    814          *syn = 's'; *fszB = 4; *iszB = 8; break;
    815       case ARM64cvt_F64_I64S:
    816          *syn = 's'; *fszB = 8; *iszB = 8; break;
    817       case ARM64cvt_F32_I32U:
    818          *syn = 'u'; *fszB = 4; *iszB = 4; break;
    819       case ARM64cvt_F64_I32U:
    820          *syn = 'u'; *fszB = 8; *iszB = 4; break;
    821       case ARM64cvt_F32_I64U:
    822          *syn = 'u'; *fszB = 4; *iszB = 8; break;
    823       case ARM64cvt_F64_I64U:
    824          *syn = 'u'; *fszB = 8; *iszB = 8; break;
    825       default:
    826          vpanic("characteriseARM64CvtOp");
    827   }
    828 }
    829 
    830 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
    831    switch (op) {
    832       case ARM64fpb_ADD: return "add";
    833       case ARM64fpb_SUB: return "sub";
    834       case ARM64fpb_MUL: return "mul";
    835       case ARM64fpb_DIV: return "div";
    836       default: vpanic("showARM64FpBinOp");
    837    }
    838 }
    839 
    840 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
    841    switch (op) {
    842       case ARM64fpu_NEG:  return "neg  ";
    843       case ARM64fpu_ABS:  return "abs  ";
    844       case ARM64fpu_SQRT: return "sqrt ";
    845       case ARM64fpu_RINT: return "rinti";
    846       default: vpanic("showARM64FpUnaryOp");
    847    }
    848 }
    849 
    850 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
    851                               /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
    852    switch (op) {
    853       case ARM64vecb_ADD64x2:   *nm = "add ";  *ar = "2d";  return;
    854       case ARM64vecb_ADD32x4:   *nm = "add ";  *ar = "4s";  return;
    855       case ARM64vecb_ADD16x8:   *nm = "add ";  *ar = "8h";  return;
    856       case ARM64vecb_ADD8x16:   *nm = "add ";  *ar = "16b"; return;
    857       case ARM64vecb_SUB64x2:   *nm = "sub ";  *ar = "2d";  return;
    858       case ARM64vecb_SUB32x4:   *nm = "sub ";  *ar = "4s";  return;
    859       case ARM64vecb_SUB16x8:   *nm = "sub ";  *ar = "8h";  return;
    860       case ARM64vecb_SUB8x16:   *nm = "sub ";  *ar = "16b"; return;
    861       case ARM64vecb_MUL32x4:   *nm = "mul ";  *ar = "4s";  return;
    862       case ARM64vecb_MUL16x8:   *nm = "mul ";  *ar = "8h";  return;
    863       case ARM64vecb_MUL8x16:   *nm = "mul ";  *ar = "16b"; return;
    864       case ARM64vecb_FADD64x2:  *nm = "fadd";  *ar = "2d";  return;
    865       case ARM64vecb_FSUB64x2:  *nm = "fsub";  *ar = "2d";  return;
    866       case ARM64vecb_FMUL64x2:  *nm = "fmul";  *ar = "2d";  return;
    867       case ARM64vecb_FDIV64x2:  *nm = "fdiv";  *ar = "2d";  return;
    868       case ARM64vecb_FADD32x4:  *nm = "fadd";  *ar = "4s";  return;
    869       case ARM64vecb_FSUB32x4:  *nm = "fsub";  *ar = "4s";  return;
    870       case ARM64vecb_FMUL32x4:  *nm = "fmul";  *ar = "4s";  return;
    871       case ARM64vecb_FDIV32x4:  *nm = "fdiv";  *ar = "4s";  return;
    872       case ARM64vecb_UMAX32x4:  *nm = "umax";  *ar = "4s";  return;
    873       case ARM64vecb_UMAX16x8:  *nm = "umax";  *ar = "8h";  return;
    874       case ARM64vecb_UMAX8x16:  *nm = "umax";  *ar = "16b"; return;
    875       case ARM64vecb_UMIN32x4:  *nm = "umin";  *ar = "4s";  return;
    876       case ARM64vecb_UMIN16x8:  *nm = "umin";  *ar = "8h";  return;
    877       case ARM64vecb_UMIN8x16:  *nm = "umin";  *ar = "16b"; return;
    878       case ARM64vecb_UMULL32x2: *nm = "umull"; *ar = "2d"; return;
    879       case ARM64vecb_UMULL16x4: *nm = "umull"; *ar = "4s"; return;
    880       case ARM64vecb_UMULL8x8:  *nm = "umull"; *ar = "8b"; return;
    881       case ARM64vecb_SMAX32x4:  *nm = "smax";  *ar = "4s";  return;
    882       case ARM64vecb_SMAX16x8:  *nm = "smax";  *ar = "8h";  return;
    883       case ARM64vecb_SMAX8x16:  *nm = "smax";  *ar = "16b"; return;
    884       case ARM64vecb_SMIN32x4:  *nm = "smin";  *ar = "4s";  return;
    885       case ARM64vecb_SMIN16x8:  *nm = "smin";  *ar = "8h";  return;
    886       case ARM64vecb_SMIN8x16:  *nm = "smin";  *ar = "16b"; return;
    887       case ARM64vecb_AND:       *nm = "and ";  *ar = "all"; return;
    888       case ARM64vecb_ORR:       *nm = "orr ";  *ar = "all"; return;
    889       case ARM64vecb_XOR:       *nm = "eor ";  *ar = "all"; return;
    890       case ARM64vecb_CMEQ64x2:  *nm = "cmeq";  *ar = "2d";  return;
    891       case ARM64vecb_CMEQ32x4:  *nm = "cmeq";  *ar = "4s";  return;
    892       case ARM64vecb_CMEQ16x8:  *nm = "cmeq";  *ar = "8h";  return;
    893       case ARM64vecb_CMEQ8x16:  *nm = "cmeq";  *ar = "16b"; return;
    894       case ARM64vecb_CMHI64x2:  *nm = "cmhi";  *ar = "2d";  return;
    895       case ARM64vecb_CMHI32x4:  *nm = "cmhi";  *ar = "4s";  return;
    896       case ARM64vecb_CMHI16x8:  *nm = "cmhi";  *ar = "8h";  return;
    897       case ARM64vecb_CMHI8x16:  *nm = "cmhi";  *ar = "16b"; return;
    898       case ARM64vecb_CMGT64x2:  *nm = "cmgt";  *ar = "2d";  return;
    899       case ARM64vecb_CMGT32x4:  *nm = "cmgt";  *ar = "4s";  return;
    900       case ARM64vecb_CMGT16x8:  *nm = "cmgt";  *ar = "8h";  return;
    901       case ARM64vecb_CMGT8x16:  *nm = "cmgt";  *ar = "16b"; return;
    902       case ARM64vecb_FCMEQ64x2: *nm = "fcmeq"; *ar = "2d"; return;
    903       case ARM64vecb_FCMEQ32x4: *nm = "fcmeq"; *ar = "4s"; return;
    904       case ARM64vecb_FCMGE64x2: *nm = "fcmge"; *ar = "2d"; return;
    905       case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return;
    906       case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return;
    907       case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return;
    908       case ARM64vecb_TBL1:      *nm = "tbl ";  *ar = "16b"; return;
    909       default: vpanic("showARM64VecBinOp");
    910    }
    911 }
    912 
    913 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
    914                                 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
    915 {
    916    switch (op) {
    917       case ARM64vecu_FNEG64x2:   *nm = "fneg   "; *ar = "2d";  return;
    918       case ARM64vecu_FNEG32x4:   *nm = "fneg   "; *ar = "4s";  return;
    919       case ARM64vecu_FABS64x2:   *nm = "fabs   "; *ar = "2d";  return;
    920       case ARM64vecu_FABS32x4:   *nm = "fabs   "; *ar = "4s";  return;
    921       case ARM64vecu_VMOVL8U:    *nm = "vmovl.u8"; *ar = "all"; return;
    922       case ARM64vecu_VMOVL16U:   *nm = "vmovl.u16"; *ar = "all"; return;
    923       case ARM64vecu_VMOVL32U:   *nm = "vmovl.u32"; *ar = "all"; return;
    924       case ARM64vecu_VMOVL8S:    *nm = "vmovl.s8"; *ar = "all"; return;
    925       case ARM64vecu_VMOVL16S:   *nm = "vmovl.s16"; *ar = "all"; return;
    926       case ARM64vecu_VMOVL32S:   *nm = "vmovl.s32"; *ar = "all"; return;
    927       case ARM64vecu_NOT:        *nm = "not    "; *ar = "all"; return;
    928       case ARM64vecu_CNT:        *nm = "cnt    "; *ar = "16b"; return;
    929       case ARM64vecu_UADDLV8x16: *nm = "uaddlv "; *ar = "16b"; return;
    930       case ARM64vecu_UADDLV16x8: *nm = "uaddlv "; *ar = "8h"; return;
    931       case ARM64vecu_UADDLV32x4: *nm = "uaddlv "; *ar = "4s"; return;
    932       case ARM64vecu_SADDLV8x16: *nm = "saddlv "; *ar = "16b"; return;
    933       case ARM64vecu_SADDLV16x8: *nm = "saddlv "; *ar = "8h"; return;
    934       case ARM64vecu_SADDLV32x4: *nm = "saddlv "; *ar = "4s"; return;
    935       default: vpanic("showARM64VecUnaryOp");
    936    }
    937 }
    938 
    939 static void showARM64VecShiftOp(/*OUT*/const HChar** nm,
    940                                 /*OUT*/const HChar** ar,
    941                                 ARM64VecShiftOp op )
    942 {
    943    switch (op) {
    944       case ARM64vecsh_USHR64x2: *nm = "ushr  "; *ar = "2d";  return;
    945       case ARM64vecsh_USHR32x4: *nm = "ushr  "; *ar = "4s";  return;
    946       case ARM64vecsh_USHR16x8: *nm = "ushr  "; *ar = "8h";  return;
    947       case ARM64vecsh_USHR8x16: *nm = "ushr  "; *ar = "16b"; return;
    948       case ARM64vecsh_SSHR64x2: *nm = "sshr  "; *ar = "2d";  return;
    949       case ARM64vecsh_SSHR32x4: *nm = "sshr  "; *ar = "4s";  return;
    950       case ARM64vecsh_SSHR16x8: *nm = "sshr  "; *ar = "8h";  return;
    951       case ARM64vecsh_SSHR8x16: *nm = "sshr  "; *ar = "16b"; return;
    952       case ARM64vecsh_SHL64x2:  *nm = "shl   "; *ar = "2d";  return;
    953       case ARM64vecsh_SHL32x4:  *nm = "shl   "; *ar = "4s";  return;
    954       case ARM64vecsh_SHL16x8:  *nm = "shl   "; *ar = "8h";  return;
    955       case ARM64vecsh_SHL8x16:  *nm = "shl   "; *ar = "16b"; return;
    956       default: vpanic("showARM64VecShiftImmOp");
    957    }
    958 }
    959 
    960 //ZZ const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
    961 //ZZ    switch (op) {
    962 //ZZ       case ARMneon_VAND: return "vand";
    963 //ZZ       case ARMneon_VORR: return "vorr";
    964 //ZZ       case ARMneon_VXOR: return "veor";
    965 //ZZ       case ARMneon_VADD: return "vadd";
    966 //ZZ       case ARMneon_VRHADDS: return "vrhadd";
    967 //ZZ       case ARMneon_VRHADDU: return "vrhadd";
    968 //ZZ       case ARMneon_VADDFP: return "vadd";
    969 //ZZ       case ARMneon_VPADDFP: return "vpadd";
    970 //ZZ       case ARMneon_VABDFP: return "vabd";
    971 //ZZ       case ARMneon_VSUB: return "vsub";
    972 //ZZ       case ARMneon_VSUBFP: return "vsub";
    973 //ZZ       case ARMneon_VMINU: return "vmin";
    974 //ZZ       case ARMneon_VMINS: return "vmin";
    975 //ZZ       case ARMneon_VMINF: return "vmin";
    976 //ZZ       case ARMneon_VMAXU: return "vmax";
    977 //ZZ       case ARMneon_VMAXS: return "vmax";
    978 //ZZ       case ARMneon_VMAXF: return "vmax";
    979 //ZZ       case ARMneon_VQADDU: return "vqadd";
    980 //ZZ       case ARMneon_VQADDS: return "vqadd";
    981 //ZZ       case ARMneon_VQSUBU: return "vqsub";
    982 //ZZ       case ARMneon_VQSUBS: return "vqsub";
    983 //ZZ       case ARMneon_VCGTU:  return "vcgt";
    984 //ZZ       case ARMneon_VCGTS:  return "vcgt";
    985 //ZZ       case ARMneon_VCGTF:  return "vcgt";
    986 //ZZ       case ARMneon_VCGEF:  return "vcgt";
    987 //ZZ       case ARMneon_VCGEU:  return "vcge";
    988 //ZZ       case ARMneon_VCGES:  return "vcge";
    989 //ZZ       case ARMneon_VCEQ:  return "vceq";
    990 //ZZ       case ARMneon_VCEQF:  return "vceq";
    991 //ZZ       case ARMneon_VPADD:   return "vpadd";
    992 //ZZ       case ARMneon_VPMINU:   return "vpmin";
    993 //ZZ       case ARMneon_VPMINS:   return "vpmin";
    994 //ZZ       case ARMneon_VPMINF:   return "vpmin";
    995 //ZZ       case ARMneon_VPMAXU:   return "vpmax";
    996 //ZZ       case ARMneon_VPMAXS:   return "vpmax";
    997 //ZZ       case ARMneon_VPMAXF:   return "vpmax";
    998 //ZZ       case ARMneon_VEXT:   return "vext";
    999 //ZZ       case ARMneon_VMUL:   return "vmuli";
   1000 //ZZ       case ARMneon_VMULLU:   return "vmull";
   1001 //ZZ       case ARMneon_VMULLS:   return "vmull";
   1002 //ZZ       case ARMneon_VMULP:  return "vmul";
   1003 //ZZ       case ARMneon_VMULFP:  return "vmul";
   1004 //ZZ       case ARMneon_VMULLP:  return "vmul";
   1005 //ZZ       case ARMneon_VQDMULH: return "vqdmulh";
   1006 //ZZ       case ARMneon_VQRDMULH: return "vqrdmulh";
   1007 //ZZ       case ARMneon_VQDMULL: return "vqdmull";
   1008 //ZZ       case ARMneon_VTBL: return "vtbl";
   1009 //ZZ       case ARMneon_VRECPS: return "vrecps";
   1010 //ZZ       case ARMneon_VRSQRTS: return "vrecps";
   1011 //ZZ       /* ... */
   1012 //ZZ       default: vpanic("showARMNeonBinOp");
   1013 //ZZ    }
   1014 //ZZ }
   1015 //ZZ
   1016 //ZZ const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
   1017 //ZZ    switch (op) {
   1018 //ZZ       case ARMneon_VAND:
   1019 //ZZ       case ARMneon_VORR:
   1020 //ZZ       case ARMneon_VXOR:
   1021 //ZZ          return "";
   1022 //ZZ       case ARMneon_VADD:
   1023 //ZZ       case ARMneon_VSUB:
   1024 //ZZ       case ARMneon_VEXT:
   1025 //ZZ       case ARMneon_VMUL:
   1026 //ZZ       case ARMneon_VPADD:
   1027 //ZZ       case ARMneon_VTBL:
   1028 //ZZ       case ARMneon_VCEQ:
   1029 //ZZ          return ".i";
   1030 //ZZ       case ARMneon_VRHADDU:
   1031 //ZZ       case ARMneon_VMINU:
   1032 //ZZ       case ARMneon_VMAXU:
   1033 //ZZ       case ARMneon_VQADDU:
   1034 //ZZ       case ARMneon_VQSUBU:
   1035 //ZZ       case ARMneon_VCGTU:
   1036 //ZZ       case ARMneon_VCGEU:
   1037 //ZZ       case ARMneon_VMULLU:
   1038 //ZZ       case ARMneon_VPMINU:
   1039 //ZZ       case ARMneon_VPMAXU:
   1040 //ZZ          return ".u";
   1041 //ZZ       case ARMneon_VRHADDS:
   1042 //ZZ       case ARMneon_VMINS:
   1043 //ZZ       case ARMneon_VMAXS:
   1044 //ZZ       case ARMneon_VQADDS:
   1045 //ZZ       case ARMneon_VQSUBS:
   1046 //ZZ       case ARMneon_VCGTS:
   1047 //ZZ       case ARMneon_VCGES:
   1048 //ZZ       case ARMneon_VQDMULL:
   1049 //ZZ       case ARMneon_VMULLS:
   1050 //ZZ       case ARMneon_VPMINS:
   1051 //ZZ       case ARMneon_VPMAXS:
   1052 //ZZ       case ARMneon_VQDMULH:
   1053 //ZZ       case ARMneon_VQRDMULH:
   1054 //ZZ          return ".s";
   1055 //ZZ       case ARMneon_VMULP:
   1056 //ZZ       case ARMneon_VMULLP:
   1057 //ZZ          return ".p";
   1058 //ZZ       case ARMneon_VADDFP:
   1059 //ZZ       case ARMneon_VABDFP:
   1060 //ZZ       case ARMneon_VPADDFP:
   1061 //ZZ       case ARMneon_VSUBFP:
   1062 //ZZ       case ARMneon_VMULFP:
   1063 //ZZ       case ARMneon_VMINF:
   1064 //ZZ       case ARMneon_VMAXF:
   1065 //ZZ       case ARMneon_VPMINF:
   1066 //ZZ       case ARMneon_VPMAXF:
   1067 //ZZ       case ARMneon_VCGTF:
   1068 //ZZ       case ARMneon_VCGEF:
   1069 //ZZ       case ARMneon_VCEQF:
   1070 //ZZ       case ARMneon_VRECPS:
   1071 //ZZ       case ARMneon_VRSQRTS:
   1072 //ZZ          return ".f";
   1073 //ZZ       /* ... */
   1074 //ZZ       default: vpanic("showARMNeonBinOpDataType");
   1075 //ZZ    }
   1076 //ZZ }
   1077 //ZZ
   1078 //ZZ const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
   1079 //ZZ    switch (op) {
   1080 //ZZ       case ARMneon_COPY: return "vmov";
   1081 //ZZ       case ARMneon_COPYLS: return "vmov";
   1082 //ZZ       case ARMneon_COPYLU: return "vmov";
   1083 //ZZ       case ARMneon_COPYN: return "vmov";
   1084 //ZZ       case ARMneon_COPYQNSS: return "vqmovn";
   1085 //ZZ       case ARMneon_COPYQNUS: return "vqmovun";
   1086 //ZZ       case ARMneon_COPYQNUU: return "vqmovn";
   1087 //ZZ       case ARMneon_NOT: return "vmvn";
   1088 //ZZ       case ARMneon_EQZ: return "vceq";
   1089 //ZZ       case ARMneon_CNT: return "vcnt";
   1090 //ZZ       case ARMneon_CLS: return "vcls";
   1091 //ZZ       case ARMneon_CLZ: return "vclz";
   1092 //ZZ       case ARMneon_DUP: return "vdup";
   1093 //ZZ       case ARMneon_PADDLS: return "vpaddl";
   1094 //ZZ       case ARMneon_PADDLU: return "vpaddl";
   1095 //ZZ       case ARMneon_VQSHLNSS: return "vqshl";
   1096 //ZZ       case ARMneon_VQSHLNUU: return "vqshl";
   1097 //ZZ       case ARMneon_VQSHLNUS: return "vqshlu";
   1098 //ZZ       case ARMneon_REV16: return "vrev16";
   1099 //ZZ       case ARMneon_REV32: return "vrev32";
   1100 //ZZ       case ARMneon_REV64: return "vrev64";
   1101 //ZZ       case ARMneon_VCVTFtoU: return "vcvt";
   1102 //ZZ       case ARMneon_VCVTFtoS: return "vcvt";
   1103 //ZZ       case ARMneon_VCVTUtoF: return "vcvt";
   1104 //ZZ       case ARMneon_VCVTStoF: return "vcvt";
   1105 //ZZ       case ARMneon_VCVTFtoFixedU: return "vcvt";
   1106 //ZZ       case ARMneon_VCVTFtoFixedS: return "vcvt";
   1107 //ZZ       case ARMneon_VCVTFixedUtoF: return "vcvt";
   1108 //ZZ       case ARMneon_VCVTFixedStoF: return "vcvt";
   1109 //ZZ       case ARMneon_VCVTF32toF16: return "vcvt";
   1110 //ZZ       case ARMneon_VCVTF16toF32: return "vcvt";
   1111 //ZZ       case ARMneon_VRECIP: return "vrecip";
   1112 //ZZ       case ARMneon_VRECIPF: return "vrecipf";
   1113 //ZZ       case ARMneon_VNEGF: return "vneg";
   1114 //ZZ       case ARMneon_ABS: return "vabs";
   1115 //ZZ       case ARMneon_VABSFP: return "vabsfp";
   1116 //ZZ       case ARMneon_VRSQRTEFP: return "vrsqrtefp";
   1117 //ZZ       case ARMneon_VRSQRTE: return "vrsqrte";
   1118 //ZZ       /* ... */
   1119 //ZZ       default: vpanic("showARMNeonUnOp");
   1120 //ZZ    }
   1121 //ZZ }
   1122 //ZZ
   1123 //ZZ const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
   1124 //ZZ    switch (op) {
   1125 //ZZ       case ARMneon_COPY:
   1126 //ZZ       case ARMneon_NOT:
   1127 //ZZ          return "";
   1128 //ZZ       case ARMneon_COPYN:
   1129 //ZZ       case ARMneon_EQZ:
   1130 //ZZ       case ARMneon_CNT:
   1131 //ZZ       case ARMneon_DUP:
   1132 //ZZ       case ARMneon_REV16:
   1133 //ZZ       case ARMneon_REV32:
   1134 //ZZ       case ARMneon_REV64:
   1135 //ZZ          return ".i";
   1136 //ZZ       case ARMneon_COPYLU:
   1137 //ZZ       case ARMneon_PADDLU:
   1138 //ZZ       case ARMneon_COPYQNUU:
   1139 //ZZ       case ARMneon_VQSHLNUU:
   1140 //ZZ       case ARMneon_VRECIP:
   1141 //ZZ       case ARMneon_VRSQRTE:
   1142 //ZZ          return ".u";
   1143 //ZZ       case ARMneon_CLS:
   1144 //ZZ       case ARMneon_CLZ:
   1145 //ZZ       case ARMneon_COPYLS:
   1146 //ZZ       case ARMneon_PADDLS:
   1147 //ZZ       case ARMneon_COPYQNSS:
   1148 //ZZ       case ARMneon_COPYQNUS:
   1149 //ZZ       case ARMneon_VQSHLNSS:
   1150 //ZZ       case ARMneon_VQSHLNUS:
   1151 //ZZ       case ARMneon_ABS:
   1152 //ZZ          return ".s";
   1153 //ZZ       case ARMneon_VRECIPF:
   1154 //ZZ       case ARMneon_VNEGF:
   1155 //ZZ       case ARMneon_VABSFP:
   1156 //ZZ       case ARMneon_VRSQRTEFP:
   1157 //ZZ          return ".f";
   1158 //ZZ       case ARMneon_VCVTFtoU: return ".u32.f32";
   1159 //ZZ       case ARMneon_VCVTFtoS: return ".s32.f32";
   1160 //ZZ       case ARMneon_VCVTUtoF: return ".f32.u32";
   1161 //ZZ       case ARMneon_VCVTStoF: return ".f32.s32";
   1162 //ZZ       case ARMneon_VCVTF16toF32: return ".f32.f16";
   1163 //ZZ       case ARMneon_VCVTF32toF16: return ".f16.f32";
   1164 //ZZ       case ARMneon_VCVTFtoFixedU: return ".u32.f32";
   1165 //ZZ       case ARMneon_VCVTFtoFixedS: return ".s32.f32";
   1166 //ZZ       case ARMneon_VCVTFixedUtoF: return ".f32.u32";
   1167 //ZZ       case ARMneon_VCVTFixedStoF: return ".f32.s32";
   1168 //ZZ       /* ... */
   1169 //ZZ       default: vpanic("showARMNeonUnOpDataType");
   1170 //ZZ    }
   1171 //ZZ }
   1172 //ZZ
   1173 //ZZ const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
   1174 //ZZ    switch (op) {
   1175 //ZZ       case ARMneon_SETELEM: return "vmov";
   1176 //ZZ       case ARMneon_GETELEMU: return "vmov";
   1177 //ZZ       case ARMneon_GETELEMS: return "vmov";
   1178 //ZZ       case ARMneon_VDUP: return "vdup";
   1179 //ZZ       /* ... */
   1180 //ZZ       default: vpanic("showARMNeonUnarySOp");
   1181 //ZZ    }
   1182 //ZZ }
   1183 //ZZ
   1184 //ZZ const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
   1185 //ZZ    switch (op) {
   1186 //ZZ       case ARMneon_SETELEM:
   1187 //ZZ       case ARMneon_VDUP:
   1188 //ZZ          return ".i";
   1189 //ZZ       case ARMneon_GETELEMS:
   1190 //ZZ          return ".s";
   1191 //ZZ       case ARMneon_GETELEMU:
   1192 //ZZ          return ".u";
   1193 //ZZ       /* ... */
   1194 //ZZ       default: vpanic("showARMNeonUnarySOp");
   1195 //ZZ    }
   1196 //ZZ }
   1197 //ZZ
   1198 //ZZ const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
   1199 //ZZ    switch (op) {
   1200 //ZZ       case ARMneon_VSHL: return "vshl";
   1201 //ZZ       case ARMneon_VSAL: return "vshl";
   1202 //ZZ       case ARMneon_VQSHL: return "vqshl";
   1203 //ZZ       case ARMneon_VQSAL: return "vqshl";
   1204 //ZZ       /* ... */
   1205 //ZZ       default: vpanic("showARMNeonShiftOp");
   1206 //ZZ    }
   1207 //ZZ }
   1208 //ZZ
   1209 //ZZ const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
   1210 //ZZ    switch (op) {
   1211 //ZZ       case ARMneon_VSHL:
   1212 //ZZ       case ARMneon_VQSHL:
   1213 //ZZ          return ".u";
   1214 //ZZ       case ARMneon_VSAL:
   1215 //ZZ       case ARMneon_VQSAL:
   1216 //ZZ          return ".s";
   1217 //ZZ       /* ... */
   1218 //ZZ       default: vpanic("showARMNeonShiftOpDataType");
   1219 //ZZ    }
   1220 //ZZ }
   1221 //ZZ
   1222 //ZZ const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
   1223 //ZZ    switch (op) {
   1224 //ZZ       case ARMneon_TRN: return "vtrn";
   1225 //ZZ       case ARMneon_ZIP: return "vzip";
   1226 //ZZ       case ARMneon_UZP: return "vuzp";
   1227 //ZZ       /* ... */
   1228 //ZZ       default: vpanic("showARMNeonDualOp");
   1229 //ZZ    }
   1230 //ZZ }
   1231 //ZZ
   1232 //ZZ const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
   1233 //ZZ    switch (op) {
   1234 //ZZ       case ARMneon_TRN:
   1235 //ZZ       case ARMneon_ZIP:
   1236 //ZZ       case ARMneon_UZP:
   1237 //ZZ          return "i";
   1238 //ZZ       /* ... */
   1239 //ZZ       default: vpanic("showARMNeonDualOp");
   1240 //ZZ    }
   1241 //ZZ }
   1242 //ZZ
   1243 //ZZ static const HChar* showARMNeonDataSize_wrk ( UInt size )
   1244 //ZZ {
   1245 //ZZ    switch (size) {
   1246 //ZZ       case 0: return "8";
   1247 //ZZ       case 1: return "16";
   1248 //ZZ       case 2: return "32";
   1249 //ZZ       case 3: return "64";
   1250 //ZZ       default: vpanic("showARMNeonDataSize");
   1251 //ZZ    }
   1252 //ZZ }
   1253 //ZZ
   1254 //ZZ static const HChar* showARMNeonDataSize ( ARMInstr* i )
   1255 //ZZ {
   1256 //ZZ    switch (i->tag) {
   1257 //ZZ       case ARMin_NBinary:
   1258 //ZZ          if (i->ARMin.NBinary.op == ARMneon_VEXT)
   1259 //ZZ             return "8";
   1260 //ZZ          if (i->ARMin.NBinary.op == ARMneon_VAND ||
   1261 //ZZ              i->ARMin.NBinary.op == ARMneon_VORR ||
   1262 //ZZ              i->ARMin.NBinary.op == ARMneon_VXOR)
   1263 //ZZ             return "";
   1264 //ZZ          return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
   1265 //ZZ       case ARMin_NUnary:
   1266 //ZZ          if (i->ARMin.NUnary.op == ARMneon_COPY ||
   1267 //ZZ              i->ARMin.NUnary.op == ARMneon_NOT ||
   1268 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
   1269 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
   1270 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1271 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1272 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1273 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
   1274 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
   1275 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
   1276 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
   1277 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
   1278 //ZZ             return "";
   1279 //ZZ          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1280 //ZZ              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1281 //ZZ              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1282 //ZZ             UInt size;
   1283 //ZZ             size = i->ARMin.NUnary.size;
   1284 //ZZ             if (size & 0x40)
   1285 //ZZ                return "64";
   1286 //ZZ             if (size & 0x20)
   1287 //ZZ                return "32";
   1288 //ZZ             if (size & 0x10)
   1289 //ZZ                return "16";
   1290 //ZZ             if (size & 0x08)
   1291 //ZZ                return "8";
   1292 //ZZ             vpanic("showARMNeonDataSize");
   1293 //ZZ          }
   1294 //ZZ          return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
   1295 //ZZ       case ARMin_NUnaryS:
   1296 //ZZ          if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
   1297 //ZZ             int size;
   1298 //ZZ             size = i->ARMin.NUnaryS.size;
   1299 //ZZ             if ((size & 1) == 1)
   1300 //ZZ                return "8";
   1301 //ZZ             if ((size & 3) == 2)
   1302 //ZZ                return "16";
   1303 //ZZ             if ((size & 7) == 4)
   1304 //ZZ                return "32";
   1305 //ZZ             vpanic("showARMNeonDataSize");
   1306 //ZZ          }
   1307 //ZZ          return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
   1308 //ZZ       case ARMin_NShift:
   1309 //ZZ          return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
   1310 //ZZ       case ARMin_NDual:
   1311 //ZZ          return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
   1312 //ZZ       default:
   1313 //ZZ          vpanic("showARMNeonDataSize");
   1314 //ZZ    }
   1315 //ZZ }
   1316 
   1317 ARM64Instr* ARM64Instr_Arith ( HReg dst,
   1318                                HReg argL, ARM64RIA* argR, Bool isAdd ) {
   1319    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1320    i->tag                 = ARM64in_Arith;
   1321    i->ARM64in.Arith.dst   = dst;
   1322    i->ARM64in.Arith.argL  = argL;
   1323    i->ARM64in.Arith.argR  = argR;
   1324    i->ARM64in.Arith.isAdd = isAdd;
   1325    return i;
   1326 }
   1327 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
   1328    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1329    i->tag              = ARM64in_Cmp;
   1330    i->ARM64in.Cmp.argL = argL;
   1331    i->ARM64in.Cmp.argR = argR;
   1332    i->ARM64in.Cmp.is64 = is64;
   1333    return i;
   1334 }
   1335 ARM64Instr* ARM64Instr_Logic ( HReg dst,
   1336                                HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
   1337    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1338    i->tag                 = ARM64in_Logic;
   1339    i->ARM64in.Logic.dst   = dst;
   1340    i->ARM64in.Logic.argL  = argL;
   1341    i->ARM64in.Logic.argR  = argR;
   1342    i->ARM64in.Logic.op    = op;
   1343    return i;
   1344 }
   1345 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
   1346    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1347    i->tag               = ARM64in_Test;
   1348    i->ARM64in.Test.argL = argL;
   1349    i->ARM64in.Test.argR = argR;
   1350    return i;
   1351 }
   1352 ARM64Instr* ARM64Instr_Shift ( HReg dst,
   1353                                HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
   1354    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1355    i->tag                = ARM64in_Shift;
   1356    i->ARM64in.Shift.dst  = dst;
   1357    i->ARM64in.Shift.argL = argL;
   1358    i->ARM64in.Shift.argR = argR;
   1359    i->ARM64in.Shift.op   = op;
   1360    return i;
   1361 }
   1362 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
   1363    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1364    i->tag               = ARM64in_Unary;
   1365    i->ARM64in.Unary.dst = dst;
   1366    i->ARM64in.Unary.src = src;
   1367    i->ARM64in.Unary.op  = op;
   1368    return i;
   1369 }
   1370 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
   1371    ARM64Instr* i      = LibVEX_Alloc(sizeof(ARM64Instr));
   1372    i->tag             = ARM64in_MovI;
   1373    i->ARM64in.MovI.dst = dst;
   1374    i->ARM64in.MovI.src = src;
   1375    vassert(hregClass(src) == HRcInt64);
   1376    vassert(hregClass(dst) == HRcInt64);
   1377    return i;
   1378 }
   1379 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
   1380    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1381    i->tag                 = ARM64in_Imm64;
   1382    i->ARM64in.Imm64.dst   = dst;
   1383    i->ARM64in.Imm64.imm64 = imm64;
   1384    return i;
   1385 }
   1386 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
   1387    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1388    i->tag                   = ARM64in_LdSt64;
   1389    i->ARM64in.LdSt64.isLoad = isLoad;
   1390    i->ARM64in.LdSt64.rD     = rD;
   1391    i->ARM64in.LdSt64.amode  = amode;
   1392    return i;
   1393 }
   1394 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
   1395    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1396    i->tag                   = ARM64in_LdSt32;
   1397    i->ARM64in.LdSt32.isLoad = isLoad;
   1398    i->ARM64in.LdSt32.rD     = rD;
   1399    i->ARM64in.LdSt32.amode  = amode;
   1400    return i;
   1401 }
   1402 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
   1403    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1404    i->tag                   = ARM64in_LdSt16;
   1405    i->ARM64in.LdSt16.isLoad = isLoad;
   1406    i->ARM64in.LdSt16.rD     = rD;
   1407    i->ARM64in.LdSt16.amode  = amode;
   1408    return i;
   1409 }
   1410 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
   1411    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1412    i->tag                  = ARM64in_LdSt8;
   1413    i->ARM64in.LdSt8.isLoad = isLoad;
   1414    i->ARM64in.LdSt8.rD     = rD;
   1415    i->ARM64in.LdSt8.amode  = amode;
   1416    return i;
   1417 }
   1418 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
   1419                                  ARM64CondCode cond, Bool toFastEP ) {
   1420    ARM64Instr* i               = LibVEX_Alloc(sizeof(ARM64Instr));
   1421    i->tag                      = ARM64in_XDirect;
   1422    i->ARM64in.XDirect.dstGA    = dstGA;
   1423    i->ARM64in.XDirect.amPC     = amPC;
   1424    i->ARM64in.XDirect.cond     = cond;
   1425    i->ARM64in.XDirect.toFastEP = toFastEP;
   1426    return i;
   1427 }
   1428 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
   1429                                 ARM64CondCode cond ) {
   1430    ARM64Instr* i           = LibVEX_Alloc(sizeof(ARM64Instr));
   1431    i->tag                  = ARM64in_XIndir;
   1432    i->ARM64in.XIndir.dstGA = dstGA;
   1433    i->ARM64in.XIndir.amPC  = amPC;
   1434    i->ARM64in.XIndir.cond  = cond;
   1435    return i;
   1436 }
   1437 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
   1438                                    ARM64CondCode cond, IRJumpKind jk ) {
   1439    ARM64Instr* i              = LibVEX_Alloc(sizeof(ARM64Instr));
   1440    i->tag                     = ARM64in_XAssisted;
   1441    i->ARM64in.XAssisted.dstGA = dstGA;
   1442    i->ARM64in.XAssisted.amPC  = amPC;
   1443    i->ARM64in.XAssisted.cond  = cond;
   1444    i->ARM64in.XAssisted.jk    = jk;
   1445    return i;
   1446 }
   1447 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
   1448                               ARM64CondCode cond ) {
   1449    ARM64Instr* i        = LibVEX_Alloc(sizeof(ARM64Instr));
   1450    i->tag               = ARM64in_CSel;
   1451    i->ARM64in.CSel.dst  = dst;
   1452    i->ARM64in.CSel.argL = argL;
   1453    i->ARM64in.CSel.argR = argR;
   1454    i->ARM64in.CSel.cond = cond;
   1455    return i;
   1456 }
   1457 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, HWord target, Int nArgRegs,
   1458                               RetLoc rloc ) {
   1459    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1460    i->tag                   = ARM64in_Call;
   1461    i->ARM64in.Call.cond     = cond;
   1462    i->ARM64in.Call.target   = target;
   1463    i->ARM64in.Call.nArgRegs = nArgRegs;
   1464    i->ARM64in.Call.rloc     = rloc;
   1465    vassert(is_sane_RetLoc(rloc));
   1466    return i;
   1467 }
   1468 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
   1469    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1470    i->tag                  = ARM64in_AddToSP;
   1471    i->ARM64in.AddToSP.simm = simm;
   1472    vassert(-4096 < simm && simm < 4096);
   1473    vassert(0 == (simm & 0xF));
   1474    return i;
   1475 }
   1476 extern ARM64Instr* ARM64Instr_FromSP  ( HReg dst ) {
   1477    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1478    i->tag                = ARM64in_FromSP;
   1479    i->ARM64in.FromSP.dst = dst;
   1480    return i;
   1481 }
   1482 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
   1483                              ARM64MulOp op ) {
   1484    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1485    i->tag              = ARM64in_Mul;
   1486    i->ARM64in.Mul.dst  = dst;
   1487    i->ARM64in.Mul.argL = argL;
   1488    i->ARM64in.Mul.argR = argR;
   1489    i->ARM64in.Mul.op   = op;
   1490    return i;
   1491 }
   1492 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
   1493    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1494    i->tag               = ARM64in_LdrEX;
   1495    i->ARM64in.LdrEX.szB = szB;
   1496    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1497    return i;
   1498 }
   1499 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
   1500    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1501    i->tag               = ARM64in_StrEX;
   1502    i->ARM64in.StrEX.szB = szB;
   1503    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1504    return i;
   1505 }
   1506 ARM64Instr* ARM64Instr_MFence ( void ) {
   1507    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1508    i->tag        = ARM64in_MFence;
   1509    return i;
   1510 }
   1511 //ZZ ARM64Instr* ARM64Instr_CLREX( void ) {
   1512 //ZZ    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1513 //ZZ    i->tag        = ARM64in_CLREX;
   1514 //ZZ    return i;
   1515 //ZZ }
   1516 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
   1517    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1518    i->tag                  = ARM64in_VLdStS;
   1519    i->ARM64in.VLdStS.isLoad = isLoad;
   1520    i->ARM64in.VLdStS.sD     = sD;
   1521    i->ARM64in.VLdStS.rN     = rN;
   1522    i->ARM64in.VLdStS.uimm12 = uimm12;
   1523    vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
   1524    return i;
   1525 }
   1526 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
   1527    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1528    i->tag                  = ARM64in_VLdStD;
   1529    i->ARM64in.VLdStD.isLoad = isLoad;
   1530    i->ARM64in.VLdStD.dD     = dD;
   1531    i->ARM64in.VLdStD.rN     = rN;
   1532    i->ARM64in.VLdStD.uimm12 = uimm12;
   1533    vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
   1534    return i;
   1535 }
   1536 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
   1537    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1538    i->tag                   = ARM64in_VLdStQ;
   1539    i->ARM64in.VLdStQ.isLoad = isLoad;
   1540    i->ARM64in.VLdStQ.rQ     = rQ;
   1541    i->ARM64in.VLdStQ.rN     = rN;
   1542    return i;
   1543 }
   1544 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
   1545    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1546    i->tag                 = ARM64in_VCvtI2F;
   1547    i->ARM64in.VCvtI2F.how = how;
   1548    i->ARM64in.VCvtI2F.rD  = rD;
   1549    i->ARM64in.VCvtI2F.rS  = rS;
   1550    return i;
   1551 }
   1552 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
   1553                                  UChar armRM ) {
   1554    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1555    i->tag                   = ARM64in_VCvtF2I;
   1556    i->ARM64in.VCvtF2I.how   = how;
   1557    i->ARM64in.VCvtF2I.rD    = rD;
   1558    i->ARM64in.VCvtF2I.rS    = rS;
   1559    i->ARM64in.VCvtF2I.armRM = armRM;
   1560    vassert(armRM <= 3);
   1561    return i;
   1562 }
   1563 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
   1564    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1565    i->tag               = ARM64in_VCvtSD;
   1566    i->ARM64in.VCvtSD.sToD = sToD;
   1567    i->ARM64in.VCvtSD.dst  = dst;
   1568    i->ARM64in.VCvtSD.src  = src;
   1569    return i;
   1570 }
   1571 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
   1572    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1573    i->tag                 = ARM64in_VUnaryD;
   1574    i->ARM64in.VUnaryD.op  = op;
   1575    i->ARM64in.VUnaryD.dst = dst;
   1576    i->ARM64in.VUnaryD.src = src;
   1577    return i;
   1578 }
   1579 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
   1580    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1581    i->tag                 = ARM64in_VUnaryS;
   1582    i->ARM64in.VUnaryS.op  = op;
   1583    i->ARM64in.VUnaryS.dst = dst;
   1584    i->ARM64in.VUnaryS.src = src;
   1585    return i;
   1586 }
   1587 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
   1588                                HReg dst, HReg argL, HReg argR ) {
   1589    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1590    i->tag                = ARM64in_VBinD;
   1591    i->ARM64in.VBinD.op   = op;
   1592    i->ARM64in.VBinD.dst  = dst;
   1593    i->ARM64in.VBinD.argL = argL;
   1594    i->ARM64in.VBinD.argR = argR;
   1595    return i;
   1596 }
   1597 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
   1598                                HReg dst, HReg argL, HReg argR ) {
   1599    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1600    i->tag                = ARM64in_VBinS;
   1601    i->ARM64in.VBinS.op   = op;
   1602    i->ARM64in.VBinS.dst  = dst;
   1603    i->ARM64in.VBinS.argL = argL;
   1604    i->ARM64in.VBinS.argR = argR;
   1605    return i;
   1606 }
   1607 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
   1608    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1609    i->tag                = ARM64in_VCmpD;
   1610    i->ARM64in.VCmpD.argL = argL;
   1611    i->ARM64in.VCmpD.argR = argR;
   1612    return i;
   1613 }
   1614 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
   1615    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1616    i->tag                = ARM64in_VCmpS;
   1617    i->ARM64in.VCmpS.argL = argL;
   1618    i->ARM64in.VCmpS.argR = argR;
   1619    return i;
   1620 }
   1621 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
   1622    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1623    i->tag                 = ARM64in_FPCR;
   1624    i->ARM64in.FPCR.toFPCR = toFPCR;
   1625    i->ARM64in.FPCR.iReg   = iReg;
   1626    return i;
   1627 }
   1628 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
   1629                                HReg dst, HReg argL, HReg argR ) {
   1630    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1631    i->tag                = ARM64in_VBinV;
   1632    i->ARM64in.VBinV.op   = op;
   1633    i->ARM64in.VBinV.dst  = dst;
   1634    i->ARM64in.VBinV.argL = argL;
   1635    i->ARM64in.VBinV.argR = argR;
   1636    return i;
   1637 }
   1638 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
   1639    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1640    i->tag                 = ARM64in_VUnaryV;
   1641    i->ARM64in.VUnaryV.op  = op;
   1642    i->ARM64in.VUnaryV.dst = dst;
   1643    i->ARM64in.VUnaryV.arg = arg;
   1644    return i;
   1645 }
   1646 ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src ) {
   1647    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1648    i->tag                      = ARM64in_VNarrowV;
   1649    i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
   1650    i->ARM64in.VNarrowV.dst     = dst;
   1651    i->ARM64in.VNarrowV.src     = src;
   1652    vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
   1653    return i;
   1654 }
   1655 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
   1656                                     HReg dst, HReg src, UInt amt ) {
   1657    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1658    i->tag                    = ARM64in_VShiftImmV;
   1659    i->ARM64in.VShiftImmV.op  = op;
   1660    i->ARM64in.VShiftImmV.dst = dst;
   1661    i->ARM64in.VShiftImmV.src = src;
   1662    i->ARM64in.VShiftImmV.amt = amt;
   1663    UInt maxSh = 0;
   1664    switch (op) {
   1665       case ARM64vecsh_USHR64x2: case ARM64vecsh_SSHR64x2:
   1666       case ARM64vecsh_SHL64x2:
   1667          maxSh = 63; break;
   1668       case ARM64vecsh_USHR32x4: case ARM64vecsh_SSHR32x4:
   1669       case ARM64vecsh_SHL32x4:
   1670          maxSh = 31; break;
   1671       case ARM64vecsh_USHR16x8: case ARM64vecsh_SSHR16x8:
   1672       case ARM64vecsh_SHL16x8:
   1673          maxSh = 15; break;
   1674       case ARM64vecsh_USHR8x16: case ARM64vecsh_SSHR8x16:
   1675       case ARM64vecsh_SHL8x16:
   1676          maxSh = 7; break;
   1677       default:
   1678          vassert(0);
   1679    }
   1680    vassert(maxSh > 0);
   1681    vassert(amt > 0 && amt <= maxSh);
   1682    return i;
   1683 }
   1684 //ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1685 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1686 //ZZ    i->tag              = ARMin_VAluS;
   1687 //ZZ    i->ARMin.VAluS.op   = op;
   1688 //ZZ    i->ARMin.VAluS.dst  = dst;
   1689 //ZZ    i->ARMin.VAluS.argL = argL;
   1690 //ZZ    i->ARMin.VAluS.argR = argR;
   1691 //ZZ    return i;
   1692 //ZZ }
   1693 //ZZ ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
   1694 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1695 //ZZ    i->tag               = ARMin_VCMovD;
   1696 //ZZ    i->ARMin.VCMovD.cond = cond;
   1697 //ZZ    i->ARMin.VCMovD.dst  = dst;
   1698 //ZZ    i->ARMin.VCMovD.src  = src;
   1699 //ZZ    vassert(cond != ARMcc_AL);
   1700 //ZZ    return i;
   1701 //ZZ }
   1702 //ZZ ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
   1703 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1704 //ZZ    i->tag               = ARMin_VCMovS;
   1705 //ZZ    i->ARMin.VCMovS.cond = cond;
   1706 //ZZ    i->ARMin.VCMovS.dst  = dst;
   1707 //ZZ    i->ARMin.VCMovS.src  = src;
   1708 //ZZ    vassert(cond != ARMcc_AL);
   1709 //ZZ    return i;
   1710 //ZZ }
   1711 //ZZ ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
   1712 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1713 //ZZ    i->tag              = ARMin_VXferD;
   1714 //ZZ    i->ARMin.VXferD.toD = toD;
   1715 //ZZ    i->ARMin.VXferD.dD  = dD;
   1716 //ZZ    i->ARMin.VXferD.rHi = rHi;
   1717 //ZZ    i->ARMin.VXferD.rLo = rLo;
   1718 //ZZ    return i;
   1719 //ZZ }
   1720 //ZZ ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
   1721 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1722 //ZZ    i->tag              = ARMin_VXferS;
   1723 //ZZ    i->ARMin.VXferS.toS = toS;
   1724 //ZZ    i->ARMin.VXferS.fD  = fD;
   1725 //ZZ    i->ARMin.VXferS.rLo = rLo;
   1726 //ZZ    return i;
   1727 //ZZ }
   1728 //ZZ ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
   1729 //ZZ                             HReg dst, HReg src ) {
   1730 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1731 //ZZ    i->tag                = ARMin_VCvtID;
   1732 //ZZ    i->ARMin.VCvtID.iToD  = iToD;
   1733 //ZZ    i->ARMin.VCvtID.syned = syned;
   1734 //ZZ    i->ARMin.VCvtID.dst   = dst;
   1735 //ZZ    i->ARMin.VCvtID.src   = src;
   1736 //ZZ    return i;
   1737 //ZZ }
   1738 //ZZ ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
   1739 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1740 //ZZ    i->tag                  = ARMin_NLdStD;
   1741 //ZZ    i->ARMin.NLdStD.isLoad  = isLoad;
   1742 //ZZ    i->ARMin.NLdStD.dD      = dD;
   1743 //ZZ    i->ARMin.NLdStD.amode   = amode;
   1744 //ZZ    return i;
   1745 //ZZ }
   1746 //ZZ
   1747 //ZZ ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
   1748 //ZZ                             UInt size, Bool Q ) {
   1749 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1750 //ZZ    i->tag                = ARMin_NUnary;
   1751 //ZZ    i->ARMin.NUnary.op   = op;
   1752 //ZZ    i->ARMin.NUnary.src  = nQ;
   1753 //ZZ    i->ARMin.NUnary.dst  = dQ;
   1754 //ZZ    i->ARMin.NUnary.size = size;
   1755 //ZZ    i->ARMin.NUnary.Q    = Q;
   1756 //ZZ    return i;
   1757 //ZZ }
   1758 //ZZ
   1759 //ZZ ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
   1760 //ZZ                              UInt size, Bool Q ) {
   1761 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1762 //ZZ    i->tag                = ARMin_NUnaryS;
   1763 //ZZ    i->ARMin.NUnaryS.op   = op;
   1764 //ZZ    i->ARMin.NUnaryS.src  = src;
   1765 //ZZ    i->ARMin.NUnaryS.dst  = dst;
   1766 //ZZ    i->ARMin.NUnaryS.size = size;
   1767 //ZZ    i->ARMin.NUnaryS.Q    = Q;
   1768 //ZZ    return i;
   1769 //ZZ }
   1770 //ZZ
   1771 //ZZ ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
   1772 //ZZ                            UInt size, Bool Q ) {
   1773 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1774 //ZZ    i->tag                = ARMin_NDual;
   1775 //ZZ    i->ARMin.NDual.op   = op;
   1776 //ZZ    i->ARMin.NDual.arg1 = nQ;
   1777 //ZZ    i->ARMin.NDual.arg2 = mQ;
   1778 //ZZ    i->ARMin.NDual.size = size;
   1779 //ZZ    i->ARMin.NDual.Q    = Q;
   1780 //ZZ    return i;
   1781 //ZZ }
   1782 //ZZ
   1783 //ZZ ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
   1784 //ZZ                              HReg dst, HReg argL, HReg argR,
   1785 //ZZ                              UInt size, Bool Q ) {
   1786 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1787 //ZZ    i->tag                = ARMin_NBinary;
   1788 //ZZ    i->ARMin.NBinary.op   = op;
   1789 //ZZ    i->ARMin.NBinary.argL = argL;
   1790 //ZZ    i->ARMin.NBinary.argR = argR;
   1791 //ZZ    i->ARMin.NBinary.dst  = dst;
   1792 //ZZ    i->ARMin.NBinary.size = size;
   1793 //ZZ    i->ARMin.NBinary.Q    = Q;
   1794 //ZZ    return i;
   1795 //ZZ }
   1796 
   1797 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
   1798    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1799    i->tag               = ARM64in_VImmQ;
   1800    i->ARM64in.VImmQ.rQ  = rQ;
   1801    i->ARM64in.VImmQ.imm = imm;
   1802    return i;
   1803 }
   1804 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
   1805    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1806    i->tag                = ARM64in_VDfromX;
   1807    i->ARM64in.VDfromX.rD = rD;
   1808    i->ARM64in.VDfromX.rX = rX;
   1809    return i;
   1810 }
   1811 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
   1812    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1813    i->tag                   = ARM64in_VQfromXX;
   1814    i->ARM64in.VQfromXX.rQ   = rQ;
   1815    i->ARM64in.VQfromXX.rXhi = rXhi;
   1816    i->ARM64in.VQfromXX.rXlo = rXlo;
   1817    return i;
   1818 }
   1819 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
   1820    ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
   1821    i->tag                    = ARM64in_VXfromQ;
   1822    i->ARM64in.VXfromQ.rX     = rX;
   1823    i->ARM64in.VXfromQ.rQ     = rQ;
   1824    i->ARM64in.VXfromQ.laneNo = laneNo;
   1825    vassert(laneNo <= 1);
   1826    return i;
   1827 }
   1828 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
   1829    ARM64Instr* i       = LibVEX_Alloc(sizeof(ARM64Instr));
   1830    i->tag              = ARM64in_VMov;
   1831    i->ARM64in.VMov.szB = szB;
   1832    i->ARM64in.VMov.dst = dst;
   1833    i->ARM64in.VMov.src = src;
   1834    switch (szB) {
   1835       case 16:
   1836         vassert(hregClass(src) == HRcVec128);
   1837         vassert(hregClass(dst) == HRcVec128);
   1838         break;
   1839       case 8:
   1840         vassert(hregClass(src) == HRcFlt64);
   1841         vassert(hregClass(dst) == HRcFlt64);
   1842         break;
   1843       default:
   1844         vpanic("ARM64Instr_VMov");
   1845    }
   1846    return i;
   1847 }
   1848 
   1849 //ZZ ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
   1850 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1851 //ZZ    i->tag               = ARMin_NCMovQ;
   1852 //ZZ    i->ARMin.NCMovQ.cond = cond;
   1853 //ZZ    i->ARMin.NCMovQ.dst  = dst;
   1854 //ZZ    i->ARMin.NCMovQ.src  = src;
   1855 //ZZ    vassert(cond != ARMcc_AL);
   1856 //ZZ    return i;
   1857 //ZZ }
   1858 //ZZ
   1859 //ZZ ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
   1860 //ZZ                             HReg dst, HReg argL, HReg argR,
   1861 //ZZ                             UInt size, Bool Q ) {
   1862 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1863 //ZZ    i->tag                = ARMin_NShift;
   1864 //ZZ    i->ARMin.NShift.op   = op;
   1865 //ZZ    i->ARMin.NShift.argL = argL;
   1866 //ZZ    i->ARMin.NShift.argR = argR;
   1867 //ZZ    i->ARMin.NShift.dst  = dst;
   1868 //ZZ    i->ARMin.NShift.size = size;
   1869 //ZZ    i->ARMin.NShift.Q    = Q;
   1870 //ZZ    return i;
   1871 //ZZ }
   1872 //ZZ
   1873 //ZZ ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
   1874 //ZZ {
   1875 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1876 //ZZ    i->tag              = ARMin_NShl64;
   1877 //ZZ    i->ARMin.NShl64.dst = dst;
   1878 //ZZ    i->ARMin.NShl64.src = src;
   1879 //ZZ    i->ARMin.NShl64.amt = amt;
   1880 //ZZ    vassert(amt >= 1 && amt <= 63);
   1881 //ZZ    return i;
   1882 //ZZ }
   1883 //ZZ
   1884 //ZZ /* Helper copy-pasted from isel.c */
   1885 //ZZ static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
   1886 //ZZ {
   1887 //ZZ    UInt i;
   1888 //ZZ    for (i = 0; i < 16; i++) {
   1889 //ZZ       if (0 == (u & 0xFFFFFF00)) {
   1890 //ZZ          *u8 = u;
   1891 //ZZ          *u4 = i;
   1892 //ZZ          return True;
   1893 //ZZ       }
   1894 //ZZ       u = ROR32(u, 30);
   1895 //ZZ    }
   1896 //ZZ    vassert(i == 16);
   1897 //ZZ    return False;
   1898 //ZZ }
   1899 //ZZ
   1900 //ZZ ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
   1901 //ZZ    UInt u8, u4;
   1902 //ZZ    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1903 //ZZ    /* Try to generate single ADD if possible */
   1904 //ZZ    if (fitsIn8x4(&u8, &u4, imm32)) {
   1905 //ZZ       i->tag            = ARMin_Alu;
   1906 //ZZ       i->ARMin.Alu.op   = ARMalu_ADD;
   1907 //ZZ       i->ARMin.Alu.dst  = rD;
   1908 //ZZ       i->ARMin.Alu.argL = rN;
   1909 //ZZ       i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
   1910 //ZZ    } else {
   1911 //ZZ       i->tag               = ARMin_Add32;
   1912 //ZZ       i->ARMin.Add32.rD    = rD;
   1913 //ZZ       i->ARMin.Add32.rN    = rN;
   1914 //ZZ       i->ARMin.Add32.imm32 = imm32;
   1915 //ZZ    }
   1916 //ZZ    return i;
   1917 //ZZ }
   1918 
   1919 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
   1920                                  ARM64AMode* amFailAddr ) {
   1921    ARM64Instr* i                 = LibVEX_Alloc(sizeof(ARM64Instr));
   1922    i->tag                        = ARM64in_EvCheck;
   1923    i->ARM64in.EvCheck.amCounter  = amCounter;
   1924    i->ARM64in.EvCheck.amFailAddr = amFailAddr;
   1925    return i;
   1926 }
   1927 
   1928 //ZZ ARMInstr* ARMInstr_ProfInc ( void ) {
   1929 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1930 //ZZ    i->tag      = ARMin_ProfInc;
   1931 //ZZ    return i;
   1932 //ZZ }
   1933 
   1934 /* ... */
   1935 
   1936 void ppARM64Instr ( ARM64Instr* i ) {
   1937    switch (i->tag) {
   1938       case ARM64in_Arith:
   1939          vex_printf("%s    ", i->ARM64in.Arith.isAdd ? "add" : "sub");
   1940          ppHRegARM64(i->ARM64in.Arith.dst);
   1941          vex_printf(", ");
   1942          ppHRegARM64(i->ARM64in.Arith.argL);
   1943          vex_printf(", ");
   1944          ppARM64RIA(i->ARM64in.Arith.argR);
   1945          return;
   1946       case ARM64in_Cmp:
   1947          vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? "   " : "(w)" );
   1948          ppHRegARM64(i->ARM64in.Cmp.argL);
   1949          vex_printf(", ");
   1950          ppARM64RIA(i->ARM64in.Cmp.argR);
   1951          return;
   1952       case ARM64in_Logic:
   1953          vex_printf("%s    ", showARM64LogicOp(i->ARM64in.Logic.op));
   1954          ppHRegARM64(i->ARM64in.Logic.dst);
   1955          vex_printf(", ");
   1956          ppHRegARM64(i->ARM64in.Logic.argL);
   1957          vex_printf(", ");
   1958          ppARM64RIL(i->ARM64in.Logic.argR);
   1959          return;
   1960       case ARM64in_Test:
   1961          vex_printf("tst    ");
   1962          ppHRegARM64(i->ARM64in.Test.argL);
   1963          vex_printf(", ");
   1964          ppARM64RIL(i->ARM64in.Test.argR);
   1965          return;
   1966       case ARM64in_Shift:
   1967          vex_printf("%s    ", showARM64ShiftOp(i->ARM64in.Shift.op));
   1968          ppHRegARM64(i->ARM64in.Shift.dst);
   1969          vex_printf(", ");
   1970          ppHRegARM64(i->ARM64in.Shift.argL);
   1971          vex_printf(", ");
   1972          ppARM64RI6(i->ARM64in.Shift.argR);
   1973          return;
   1974       case ARM64in_Unary:
   1975          vex_printf("%s    ", showARM64UnaryOp(i->ARM64in.Unary.op));
   1976          ppHRegARM64(i->ARM64in.Unary.dst);
   1977          vex_printf(", ");
   1978          ppHRegARM64(i->ARM64in.Unary.src);
   1979          return;
   1980       case ARM64in_MovI:
   1981          vex_printf("mov    ");
   1982          ppHRegARM64(i->ARM64in.MovI.dst);
   1983          vex_printf(", ");
   1984          ppHRegARM64(i->ARM64in.MovI.src);
   1985          return;
   1986       case ARM64in_Imm64:
   1987          vex_printf("imm64  ");
   1988          ppHRegARM64(i->ARM64in.Imm64.dst);
   1989          vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
   1990          return;
   1991       case ARM64in_LdSt64:
   1992          if (i->ARM64in.LdSt64.isLoad) {
   1993             vex_printf("ldr    ");
   1994             ppHRegARM64(i->ARM64in.LdSt64.rD);
   1995             vex_printf(", ");
   1996             ppARM64AMode(i->ARM64in.LdSt64.amode);
   1997          } else {
   1998             vex_printf("str    ");
   1999             ppARM64AMode(i->ARM64in.LdSt64.amode);
   2000             vex_printf(", ");
   2001             ppHRegARM64(i->ARM64in.LdSt64.rD);
   2002          }
   2003          return;
   2004       case ARM64in_LdSt32:
   2005          if (i->ARM64in.LdSt32.isLoad) {
   2006             vex_printf("ldruw  ");
   2007             ppHRegARM64(i->ARM64in.LdSt32.rD);
   2008             vex_printf(", ");
   2009             ppARM64AMode(i->ARM64in.LdSt32.amode);
   2010          } else {
   2011             vex_printf("strw   ");
   2012             ppARM64AMode(i->ARM64in.LdSt32.amode);
   2013             vex_printf(", ");
   2014             ppHRegARM64(i->ARM64in.LdSt32.rD);
   2015          }
   2016          return;
   2017       case ARM64in_LdSt16:
   2018          if (i->ARM64in.LdSt16.isLoad) {
   2019             vex_printf("ldruh  ");
   2020             ppHRegARM64(i->ARM64in.LdSt16.rD);
   2021             vex_printf(", ");
   2022             ppARM64AMode(i->ARM64in.LdSt16.amode);
   2023          } else {
   2024             vex_printf("strh   ");
   2025             ppARM64AMode(i->ARM64in.LdSt16.amode);
   2026             vex_printf(", ");
   2027             ppHRegARM64(i->ARM64in.LdSt16.rD);
   2028          }
   2029          return;
   2030       case ARM64in_LdSt8:
   2031          if (i->ARM64in.LdSt8.isLoad) {
   2032             vex_printf("ldrub  ");
   2033             ppHRegARM64(i->ARM64in.LdSt8.rD);
   2034             vex_printf(", ");
   2035             ppARM64AMode(i->ARM64in.LdSt8.amode);
   2036          } else {
   2037             vex_printf("strb   ");
   2038             ppARM64AMode(i->ARM64in.LdSt8.amode);
   2039             vex_printf(", ");
   2040             ppHRegARM64(i->ARM64in.LdSt8.rD);
   2041          }
   2042          return;
   2043       case ARM64in_XDirect:
   2044          vex_printf("(xDirect) ");
   2045          vex_printf("if (%%pstate.%s) { ",
   2046                     showARM64CondCode(i->ARM64in.XDirect.cond));
   2047          vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
   2048          vex_printf("str x9,");
   2049          ppARM64AMode(i->ARM64in.XDirect.amPC);
   2050          vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
   2051                     i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
   2052          vex_printf("blr x9 }");
   2053          return;
   2054       case ARM64in_XIndir:
   2055          vex_printf("(xIndir) ");
   2056          vex_printf("if (%%pstate.%s) { ",
   2057                     showARM64CondCode(i->ARM64in.XIndir.cond));
   2058          vex_printf("str ");
   2059          ppHRegARM64(i->ARM64in.XIndir.dstGA);
   2060          vex_printf(",");
   2061          ppARM64AMode(i->ARM64in.XIndir.amPC);
   2062          vex_printf("; imm64 x9,$disp_cp_xindir; ");
   2063          vex_printf("br x9 }");
   2064          return;
   2065       case ARM64in_XAssisted:
   2066          vex_printf("(xAssisted) ");
   2067          vex_printf("if (%%pstate.%s) { ",
   2068                     showARM64CondCode(i->ARM64in.XAssisted.cond));
   2069          vex_printf("str ");
   2070          ppHRegARM64(i->ARM64in.XAssisted.dstGA);
   2071          vex_printf(",");
   2072          ppARM64AMode(i->ARM64in.XAssisted.amPC);
   2073          vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
   2074                     (Int)i->ARM64in.XAssisted.jk);
   2075          vex_printf("imm64 x9,$disp_cp_xassisted; ");
   2076          vex_printf("br x9 }");
   2077          return;
   2078       case ARM64in_CSel:
   2079          vex_printf("csel   ");
   2080          ppHRegARM64(i->ARM64in.CSel.dst);
   2081          vex_printf(", ");
   2082          ppHRegARM64(i->ARM64in.CSel.argL);
   2083          vex_printf(", ");
   2084          ppHRegARM64(i->ARM64in.CSel.argR);
   2085          vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
   2086          return;
   2087       case ARM64in_Call:
   2088          vex_printf("call%s ",
   2089                     i->ARM64in.Call.cond==ARM64cc_AL
   2090                        ? "  " : showARM64CondCode(i->ARM64in.Call.cond));
   2091          vex_printf("0x%lx [nArgRegs=%d, ",
   2092                     i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
   2093          ppRetLoc(i->ARM64in.Call.rloc);
   2094          vex_printf("]");
   2095          return;
   2096       case ARM64in_AddToSP: {
   2097          Int simm = i->ARM64in.AddToSP.simm;
   2098          vex_printf("%s    xsp, xsp, #%d", simm < 0 ? "sub" : "add",
   2099                                            simm < 0 ? -simm : simm);
   2100          return;
   2101       }
   2102       case ARM64in_FromSP:
   2103          vex_printf("mov    ");
   2104          ppHRegARM64(i->ARM64in.FromSP.dst);
   2105          vex_printf(", xsp");
   2106          return;
   2107       case ARM64in_Mul:
   2108          vex_printf("%s  ", showARM64MulOp(i->ARM64in.Mul.op));
   2109          ppHRegARM64(i->ARM64in.Mul.dst);
   2110          vex_printf(", ");
   2111          ppHRegARM64(i->ARM64in.Mul.argL);
   2112          vex_printf(", ");
   2113          ppHRegARM64(i->ARM64in.Mul.argR);
   2114          return;
   2115 
   2116       case ARM64in_LdrEX: {
   2117          const HChar* sz = " ";
   2118          switch (i->ARM64in.LdrEX.szB) {
   2119             case 1: sz = "b"; break;
   2120             case 2: sz = "h"; break;
   2121             case 4: case 8: break;
   2122             default: vassert(0);
   2123          }
   2124          vex_printf("ldxr%s  %c2, [x4]",
   2125                     sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
   2126          return;
   2127       }
   2128       case ARM64in_StrEX: {
   2129          const HChar* sz = " ";
   2130          switch (i->ARM64in.StrEX.szB) {
   2131             case 1: sz = "b"; break;
   2132             case 2: sz = "h"; break;
   2133             case 4: case 8: break;
   2134             default: vassert(0);
   2135          }
   2136          vex_printf("stxr%s  w0, %c2, [x4]",
   2137                     sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
   2138          return;
   2139       }
   2140       case ARM64in_MFence:
   2141          vex_printf("(mfence) dsb sy; dmb sy; isb");
   2142          return;
   2143 //ZZ       case ARM64in_CLREX:
   2144 //ZZ          vex_printf("clrex");
   2145 //ZZ          return;
   2146       case ARM64in_VLdStS:
   2147          if (i->ARM64in.VLdStS.isLoad) {
   2148             vex_printf("ldr    ");
   2149             ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
   2150             vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
   2151             ppHRegARM64(i->ARM64in.VLdStS.rN);
   2152             vex_printf(")");
   2153          } else {
   2154             vex_printf("str    ");
   2155             vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
   2156             ppHRegARM64(i->ARM64in.VLdStS.rN);
   2157             vex_printf("), ");
   2158             ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
   2159          }
   2160          return;
   2161       case ARM64in_VLdStD:
   2162          if (i->ARM64in.VLdStD.isLoad) {
   2163             vex_printf("ldr    ");
   2164             ppHRegARM64(i->ARM64in.VLdStD.dD);
   2165             vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
   2166             ppHRegARM64(i->ARM64in.VLdStD.rN);
   2167             vex_printf(")");
   2168          } else {
   2169             vex_printf("str    ");
   2170             vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
   2171             ppHRegARM64(i->ARM64in.VLdStD.rN);
   2172             vex_printf("), ");
   2173             ppHRegARM64(i->ARM64in.VLdStD.dD);
   2174          }
   2175          return;
   2176       case ARM64in_VLdStQ:
   2177          if (i->ARM64in.VLdStQ.isLoad)
   2178             vex_printf("ld1.2d {");
   2179          else
   2180             vex_printf("st1.2d {");
   2181          ppHRegARM64(i->ARM64in.VLdStQ.rQ);
   2182          vex_printf("}, [");
   2183          ppHRegARM64(i->ARM64in.VLdStQ.rN);
   2184          vex_printf("]");
   2185          return;
   2186       case ARM64in_VCvtI2F: {
   2187          HChar syn  = '?';
   2188          UInt  fszB = 0;
   2189          UInt  iszB = 0;
   2190          characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
   2191          vex_printf("%ccvtf  ", syn);
   2192          ppHRegARM64(i->ARM64in.VCvtI2F.rD);
   2193          vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
   2194          ppHRegARM64(i->ARM64in.VCvtI2F.rS);
   2195          vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
   2196          return;
   2197       }
   2198       case ARM64in_VCvtF2I: {
   2199          HChar syn  = '?';
   2200          UInt  fszB = 0;
   2201          UInt  iszB = 0;
   2202          HChar rmo  = '?';
   2203          characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
   2204          UChar armRM = i->ARM64in.VCvtF2I.armRM;
   2205          if (armRM < 4) rmo = "npmz"[armRM];
   2206          vex_printf("fcvt%c%c ", rmo, syn);
   2207          ppHRegARM64(i->ARM64in.VCvtF2I.rD);
   2208          vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
   2209          ppHRegARM64(i->ARM64in.VCvtF2I.rS);
   2210          vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
   2211          return;
   2212       }
   2213       case ARM64in_VCvtSD:
   2214          vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
   2215          if (i->ARM64in.VCvtSD.sToD) {
   2216             ppHRegARM64(i->ARM64in.VCvtSD.dst);
   2217             vex_printf(", ");
   2218             ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
   2219          } else {
   2220             ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
   2221             vex_printf(", ");
   2222             ppHRegARM64(i->ARM64in.VCvtSD.src);
   2223          }
   2224          return;
   2225       case ARM64in_VUnaryD:
   2226          vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
   2227          ppHRegARM64(i->ARM64in.VUnaryD.dst);
   2228          vex_printf(", ");
   2229          ppHRegARM64(i->ARM64in.VUnaryD.src);
   2230          return;
   2231       case ARM64in_VUnaryS:
   2232          vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
   2233          ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
   2234          vex_printf(", ");
   2235          ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
   2236          return;
   2237       case ARM64in_VBinD:
   2238          vex_printf("f%s   ", showARM64FpBinOp(i->ARM64in.VBinD.op));
   2239          ppHRegARM64(i->ARM64in.VBinD.dst);
   2240          vex_printf(", ");
   2241          ppHRegARM64(i->ARM64in.VBinD.argL);
   2242          vex_printf(", ");
   2243          ppHRegARM64(i->ARM64in.VBinD.argR);
   2244          return;
   2245       case ARM64in_VBinS:
   2246          vex_printf("f%s   ", showARM64FpBinOp(i->ARM64in.VBinS.op));
   2247          ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
   2248          vex_printf(", ");
   2249          ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
   2250          vex_printf(", ");
   2251          ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
   2252          return;
   2253       case ARM64in_VCmpD:
   2254          vex_printf("fcmp   ");
   2255          ppHRegARM64(i->ARM64in.VCmpD.argL);
   2256          vex_printf(", ");
   2257          ppHRegARM64(i->ARM64in.VCmpD.argR);
   2258          return;
   2259       case ARM64in_VCmpS:
   2260          vex_printf("fcmp   ");
   2261          ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
   2262          vex_printf(", ");
   2263          ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
   2264          return;
   2265       case ARM64in_FPCR:
   2266          if (i->ARM64in.FPCR.toFPCR) {
   2267             vex_printf("msr    fpcr, ");
   2268             ppHRegARM64(i->ARM64in.FPCR.iReg);
   2269          } else {
   2270             vex_printf("mrs    ");
   2271             ppHRegARM64(i->ARM64in.FPCR.iReg);
   2272             vex_printf(", fpcr");
   2273          }
   2274          return;
   2275       case ARM64in_VBinV: {
   2276          const HChar* nm = "??";
   2277          const HChar* ar = "??";
   2278          showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
   2279          vex_printf("%s   ", nm);
   2280          ppHRegARM64(i->ARM64in.VBinV.dst);
   2281          vex_printf(".%s, ", ar);
   2282          ppHRegARM64(i->ARM64in.VBinV.argL);
   2283          vex_printf(".%s, ", ar);
   2284          ppHRegARM64(i->ARM64in.VBinV.argR);
   2285          vex_printf(".%s", ar);
   2286          return;
   2287       }
   2288       case ARM64in_VUnaryV: {
   2289          const HChar* nm = "??";
   2290          const HChar* ar = "??";
   2291          showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
   2292          vex_printf("%s  ", nm);
   2293          ppHRegARM64(i->ARM64in.VUnaryV.dst);
   2294          vex_printf(".%s, ", ar);
   2295          ppHRegARM64(i->ARM64in.VUnaryV.arg);
   2296          vex_printf(".%s", ar);
   2297          return;
   2298       }
   2299       case ARM64in_VNarrowV: {
   2300          UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
   2301          const HChar* darr[3] = { "8b", "4h", "2s" };
   2302          const HChar* sarr[3] = { "8h", "4s", "2d" };
   2303          vex_printf("xtn    ");
   2304          ppHRegARM64(i->ARM64in.VNarrowV.dst);
   2305          vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
   2306          ppHRegARM64(i->ARM64in.VNarrowV.src);
   2307          vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
   2308          return;
   2309       }
   2310       case ARM64in_VShiftImmV: {
   2311          const HChar* nm = "??";
   2312          const HChar* ar = "??";
   2313          showARM64VecShiftOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
   2314          vex_printf("%s ", nm);
   2315          ppHRegARM64(i->ARM64in.VShiftImmV.dst);
   2316          vex_printf(".%s, ", ar);
   2317          ppHRegARM64(i->ARM64in.VShiftImmV.src);
   2318          vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
   2319          return;
   2320       }
   2321 //ZZ       case ARMin_VAluS:
   2322 //ZZ          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
   2323 //ZZ          ppHRegARM(i->ARMin.VAluS.dst);
   2324 //ZZ          vex_printf(", ");
   2325 //ZZ          ppHRegARM(i->ARMin.VAluS.argL);
   2326 //ZZ          vex_printf(", ");
   2327 //ZZ          ppHRegARM(i->ARMin.VAluS.argR);
   2328 //ZZ          return;
   2329 //ZZ       case ARMin_VCMovD:
   2330 //ZZ          vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
   2331 //ZZ          ppHRegARM(i->ARMin.VCMovD.dst);
   2332 //ZZ          vex_printf(", ");
   2333 //ZZ          ppHRegARM(i->ARMin.VCMovD.src);
   2334 //ZZ          return;
   2335 //ZZ       case ARMin_VCMovS:
   2336 //ZZ          vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
   2337 //ZZ          ppHRegARM(i->ARMin.VCMovS.dst);
   2338 //ZZ          vex_printf(", ");
   2339 //ZZ          ppHRegARM(i->ARMin.VCMovS.src);
   2340 //ZZ          return;
   2341 //ZZ       case ARMin_VXferD:
   2342 //ZZ          vex_printf("vmov  ");
   2343 //ZZ          if (i->ARMin.VXferD.toD) {
   2344 //ZZ             ppHRegARM(i->ARMin.VXferD.dD);
   2345 //ZZ             vex_printf(", ");
   2346 //ZZ             ppHRegARM(i->ARMin.VXferD.rLo);
   2347 //ZZ             vex_printf(", ");
   2348 //ZZ             ppHRegARM(i->ARMin.VXferD.rHi);
   2349 //ZZ          } else {
   2350 //ZZ             ppHRegARM(i->ARMin.VXferD.rLo);
   2351 //ZZ             vex_printf(", ");
   2352 //ZZ             ppHRegARM(i->ARMin.VXferD.rHi);
   2353 //ZZ             vex_printf(", ");
   2354 //ZZ             ppHRegARM(i->ARMin.VXferD.dD);
   2355 //ZZ          }
   2356 //ZZ          return;
   2357 //ZZ       case ARMin_VXferS:
   2358 //ZZ          vex_printf("vmov  ");
   2359 //ZZ          if (i->ARMin.VXferS.toS) {
   2360 //ZZ             ppHRegARM(i->ARMin.VXferS.fD);
   2361 //ZZ             vex_printf(", ");
   2362 //ZZ             ppHRegARM(i->ARMin.VXferS.rLo);
   2363 //ZZ          } else {
   2364 //ZZ             ppHRegARM(i->ARMin.VXferS.rLo);
   2365 //ZZ             vex_printf(", ");
   2366 //ZZ             ppHRegARM(i->ARMin.VXferS.fD);
   2367 //ZZ          }
   2368 //ZZ          return;
   2369 //ZZ       case ARMin_VCvtID: {
   2370 //ZZ          const HChar* nm = "?";
   2371 //ZZ          if (i->ARMin.VCvtID.iToD) {
   2372 //ZZ             nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
   2373 //ZZ          } else {
   2374 //ZZ             nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
   2375 //ZZ          }
   2376 //ZZ          vex_printf("%s ", nm);
   2377 //ZZ          ppHRegARM(i->ARMin.VCvtID.dst);
   2378 //ZZ          vex_printf(", ");
   2379 //ZZ          ppHRegARM(i->ARMin.VCvtID.src);
   2380 //ZZ          return;
   2381 //ZZ       }
   2382 //ZZ       case ARMin_NLdStD:
   2383 //ZZ          if (i->ARMin.NLdStD.isLoad)
   2384 //ZZ             vex_printf("vld1.32 {");
   2385 //ZZ          else
   2386 //ZZ             vex_printf("vst1.32 {");
   2387 //ZZ          ppHRegARM(i->ARMin.NLdStD.dD);
   2388 //ZZ          vex_printf("} ");
   2389 //ZZ          ppARMAModeN(i->ARMin.NLdStD.amode);
   2390 //ZZ          return;
   2391 //ZZ       case ARMin_NUnary:
   2392 //ZZ          vex_printf("%s%s%s  ",
   2393 //ZZ                     showARMNeonUnOp(i->ARMin.NUnary.op),
   2394 //ZZ                     showARMNeonUnOpDataType(i->ARMin.NUnary.op),
   2395 //ZZ                     showARMNeonDataSize(i));
   2396 //ZZ          ppHRegARM(i->ARMin.NUnary.dst);
   2397 //ZZ          vex_printf(", ");
   2398 //ZZ          ppHRegARM(i->ARMin.NUnary.src);
   2399 //ZZ          if (i->ARMin.NUnary.op == ARMneon_EQZ)
   2400 //ZZ             vex_printf(", #0");
   2401 //ZZ          if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   2402 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   2403 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   2404 //ZZ              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
   2405 //ZZ             vex_printf(", #%d", i->ARMin.NUnary.size);
   2406 //ZZ          }
   2407 //ZZ          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   2408 //ZZ              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   2409 //ZZ              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   2410 //ZZ             UInt size;
   2411 //ZZ             size = i->ARMin.NUnary.size;
   2412 //ZZ             if (size & 0x40) {
   2413 //ZZ                vex_printf(", #%d", size - 64);
   2414 //ZZ             } else if (size & 0x20) {
   2415 //ZZ                vex_printf(", #%d", size - 32);
   2416 //ZZ             } else if (size & 0x10) {
   2417 //ZZ                vex_printf(", #%d", size - 16);
   2418 //ZZ             } else if (size & 0x08) {
   2419 //ZZ                vex_printf(", #%d", size - 8);
   2420 //ZZ             }
   2421 //ZZ          }
   2422 //ZZ          return;
   2423 //ZZ       case ARMin_NUnaryS:
   2424 //ZZ          vex_printf("%s%s%s  ",
   2425 //ZZ                     showARMNeonUnOpS(i->ARMin.NUnaryS.op),
   2426 //ZZ                     showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
   2427 //ZZ                     showARMNeonDataSize(i));
   2428 //ZZ          ppARMNRS(i->ARMin.NUnaryS.dst);
   2429 //ZZ          vex_printf(", ");
   2430 //ZZ          ppARMNRS(i->ARMin.NUnaryS.src);
   2431 //ZZ          return;
   2432 //ZZ       case ARMin_NShift:
   2433 //ZZ          vex_printf("%s%s%s  ",
   2434 //ZZ                     showARMNeonShiftOp(i->ARMin.NShift.op),
   2435 //ZZ                     showARMNeonShiftOpDataType(i->ARMin.NShift.op),
   2436 //ZZ                     showARMNeonDataSize(i));
   2437 //ZZ          ppHRegARM(i->ARMin.NShift.dst);
   2438 //ZZ          vex_printf(", ");
   2439 //ZZ          ppHRegARM(i->ARMin.NShift.argL);
   2440 //ZZ          vex_printf(", ");
   2441 //ZZ          ppHRegARM(i->ARMin.NShift.argR);
   2442 //ZZ          return;
   2443 //ZZ       case ARMin_NShl64:
   2444 //ZZ          vex_printf("vshl.i64 ");
   2445 //ZZ          ppHRegARM(i->ARMin.NShl64.dst);
   2446 //ZZ          vex_printf(", ");
   2447 //ZZ          ppHRegARM(i->ARMin.NShl64.src);
   2448 //ZZ          vex_printf(", #%u", i->ARMin.NShl64.amt);
   2449 //ZZ          return;
   2450 //ZZ       case ARMin_NDual:
   2451 //ZZ          vex_printf("%s%s%s  ",
   2452 //ZZ                     showARMNeonDualOp(i->ARMin.NDual.op),
   2453 //ZZ                     showARMNeonDualOpDataType(i->ARMin.NDual.op),
   2454 //ZZ                     showARMNeonDataSize(i));
   2455 //ZZ          ppHRegARM(i->ARMin.NDual.arg1);
   2456 //ZZ          vex_printf(", ");
   2457 //ZZ          ppHRegARM(i->ARMin.NDual.arg2);
   2458 //ZZ          return;
   2459 //ZZ       case ARMin_NBinary:
   2460 //ZZ          vex_printf("%s%s%s",
   2461 //ZZ                     showARMNeonBinOp(i->ARMin.NBinary.op),
   2462 //ZZ                     showARMNeonBinOpDataType(i->ARMin.NBinary.op),
   2463 //ZZ                     showARMNeonDataSize(i));
   2464 //ZZ          vex_printf("  ");
   2465 //ZZ          ppHRegARM(i->ARMin.NBinary.dst);
   2466 //ZZ          vex_printf(", ");
   2467 //ZZ          ppHRegARM(i->ARMin.NBinary.argL);
   2468 //ZZ          vex_printf(", ");
   2469 //ZZ          ppHRegARM(i->ARMin.NBinary.argR);
   2470 //ZZ          return;
   2471       case ARM64in_VImmQ:
   2472          vex_printf("qimm   ");
   2473          ppHRegARM64(i->ARM64in.VImmQ.rQ);
   2474          vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
   2475          return;
   2476       case ARM64in_VDfromX:
   2477          vex_printf("fmov   ");
   2478          ppHRegARM64(i->ARM64in.VDfromX.rD);
   2479          vex_printf(", ");
   2480          ppHRegARM64(i->ARM64in.VDfromX.rX);
   2481          return;
   2482       case ARM64in_VQfromXX:
   2483          vex_printf("qFromXX ");
   2484          ppHRegARM64(i->ARM64in.VQfromXX.rQ);
   2485          vex_printf(", ");
   2486          ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
   2487          vex_printf(", ");
   2488          ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
   2489          return;
   2490       case ARM64in_VXfromQ:
   2491          vex_printf("mov    ");
   2492          ppHRegARM64(i->ARM64in.VXfromQ.rX);
   2493          vex_printf(", ");
   2494          ppHRegARM64(i->ARM64in.VXfromQ.rQ);
   2495          vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
   2496          return;
   2497       case ARM64in_VMov: {
   2498          UChar aux = '?';
   2499          switch (i->ARM64in.VMov.szB) {
   2500             case 16: aux = 'q'; break;
   2501             case 8:  aux = 'd'; break;
   2502             case 4:  aux = 's'; break;
   2503             default: break;
   2504          }
   2505          vex_printf("mov(%c) ", aux);
   2506          ppHRegARM64(i->ARM64in.VMov.dst);
   2507          vex_printf(", ");
   2508          ppHRegARM64(i->ARM64in.VMov.src);
   2509          return;
   2510    }
   2511 //ZZ        case ARMin_NCMovQ:
   2512 //ZZ          vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
   2513 //ZZ          ppHRegARM(i->ARMin.NCMovQ.dst);
   2514 //ZZ          vex_printf(", ");
   2515 //ZZ          ppHRegARM(i->ARMin.NCMovQ.src);
   2516 //ZZ          return;
   2517 //ZZ       case ARMin_Add32:
   2518 //ZZ          vex_printf("add32 ");
   2519 //ZZ          ppHRegARM(i->ARMin.Add32.rD);
   2520 //ZZ          vex_printf(", ");
   2521 //ZZ          ppHRegARM(i->ARMin.Add32.rN);
   2522 //ZZ          vex_printf(", ");
   2523 //ZZ          vex_printf("%d", i->ARMin.Add32.imm32);
   2524 //ZZ          return;
   2525       case ARM64in_EvCheck:
   2526          vex_printf("(evCheck) ldr w9,");
   2527          ppARM64AMode(i->ARM64in.EvCheck.amCounter);
   2528          vex_printf("; subs w9,w9,$1; str w9,");
   2529          ppARM64AMode(i->ARM64in.EvCheck.amCounter);
   2530          vex_printf("; bpl nofail; ldr x9,");
   2531          ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
   2532          vex_printf("; br x9; nofail:");
   2533          return;
   2534 //ZZ       case ARMin_ProfInc:
   2535 //ZZ          vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
   2536 //ZZ                     "movw r12,HI16($NotKnownYet); "
   2537 //ZZ                     "ldr r11,[r12]; "
   2538 //ZZ                     "adds r11,r11,$1; "
   2539 //ZZ                     "str r11,[r12]; "
   2540 //ZZ                     "ldr r11,[r12+4]; "
   2541 //ZZ                     "adc r11,r11,$0; "
   2542 //ZZ                     "str r11,[r12+4]");
   2543 //ZZ          return;
   2544       default:
   2545          vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
   2546          vpanic("ppARM64Instr(1)");
   2547          return;
   2548    }
   2549 }
   2550 
   2551 
   2552 /* --------- Helpers for register allocation. --------- */
   2553 
   2554 void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 )
   2555 {
   2556    vassert(mode64 == True);
   2557    initHRegUsage(u);
   2558    switch (i->tag) {
   2559       case ARM64in_Arith:
   2560          addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
   2561          addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
   2562          addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
   2563          return;
   2564       case ARM64in_Cmp:
   2565          addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
   2566          addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
   2567          return;
   2568       case ARM64in_Logic:
   2569          addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
   2570          addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
   2571          addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
   2572          return;
   2573       case ARM64in_Test:
   2574          addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
   2575          addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
   2576          return;
   2577       case ARM64in_Shift:
   2578          addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
   2579          addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
   2580          addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
   2581          return;
   2582       case ARM64in_Unary:
   2583          addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
   2584          addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
   2585          return;
   2586       case ARM64in_MovI:
   2587          addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
   2588          addHRegUse(u, HRmRead,  i->ARM64in.MovI.src);
   2589          return;
   2590       case ARM64in_Imm64:
   2591          addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
   2592          return;
   2593       case ARM64in_LdSt64:
   2594          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
   2595          if (i->ARM64in.LdSt64.isLoad) {
   2596             addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
   2597          } else {
   2598             addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
   2599          }
   2600          return;
   2601       case ARM64in_LdSt32:
   2602          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
   2603          if (i->ARM64in.LdSt32.isLoad) {
   2604             addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
   2605          } else {
   2606             addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
   2607          }
   2608          return;
   2609       case ARM64in_LdSt16:
   2610          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
   2611          if (i->ARM64in.LdSt16.isLoad) {
   2612             addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
   2613          } else {
   2614             addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
   2615          }
   2616          return;
   2617       case ARM64in_LdSt8:
   2618          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
   2619          if (i->ARM64in.LdSt8.isLoad) {
   2620             addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
   2621          } else {
   2622             addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
   2623          }
   2624          return;
   2625       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
   2626          conditionally exit the block.  Hence we only need to list (1)
   2627          the registers that they read, and (2) the registers that they
   2628          write in the case where the block is not exited.  (2) is
   2629          empty, hence only (1) is relevant here. */
   2630       case ARM64in_XDirect:
   2631          addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
   2632          return;
   2633       case ARM64in_XIndir:
   2634          addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
   2635          addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
   2636          return;
   2637       case ARM64in_XAssisted:
   2638          addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
   2639          addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
   2640          return;
   2641       case ARM64in_CSel:
   2642          addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
   2643          addHRegUse(u, HRmRead,  i->ARM64in.CSel.argL);
   2644          addHRegUse(u, HRmRead,  i->ARM64in.CSel.argR);
   2645          return;
   2646       case ARM64in_Call:
   2647          /* logic and comments copied/modified from x86 back end */
   2648          /* This is a bit subtle. */
   2649          /* First off, claim it trashes all the caller-saved regs
   2650             which fall within the register allocator's jurisdiction.
   2651             These I believe to be x0 to x7.  Also need to be
   2652             careful about vector regs. */
   2653          addHRegUse(u, HRmWrite, hregARM64_X0());
   2654          addHRegUse(u, HRmWrite, hregARM64_X1());
   2655          addHRegUse(u, HRmWrite, hregARM64_X2());
   2656          addHRegUse(u, HRmWrite, hregARM64_X3());
   2657          addHRegUse(u, HRmWrite, hregARM64_X4());
   2658          addHRegUse(u, HRmWrite, hregARM64_X5());
   2659          addHRegUse(u, HRmWrite, hregARM64_X6());
   2660          addHRegUse(u, HRmWrite, hregARM64_X7());
   2661          addHRegUse(u, HRmWrite, hregARM64_Q16());
   2662          addHRegUse(u, HRmWrite, hregARM64_Q17());
   2663          addHRegUse(u, HRmWrite, hregARM64_Q18());
   2664          /* Now we have to state any parameter-carrying registers
   2665             which might be read.  This depends on nArgRegs. */
   2666             switch (i->ARM64in.Call.nArgRegs) {
   2667             case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
   2668             case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
   2669             case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
   2670             case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
   2671             case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
   2672             case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
   2673             case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
   2674             case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
   2675             case 0: break;
   2676             default: vpanic("getRegUsage_ARM64:Call:regparms");
   2677          }
   2678          /* Finally, there is the issue that the insn trashes a
   2679             register because the literal target address has to be
   2680             loaded into a register.  However, we reserve x9 for that
   2681             purpose so there's no further complexity here.  Stating x9
   2682             as trashed is pointless since it's not under the control
   2683             of the allocator, but what the hell. */
   2684          addHRegUse(u, HRmWrite, hregARM64_X9());
   2685          return;
   2686       case ARM64in_AddToSP:
   2687          /* Only changes SP, but regalloc doesn't control that, hence
   2688             we don't care. */
   2689          return;
   2690       case ARM64in_FromSP:
   2691          addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
   2692          return;
   2693       case ARM64in_Mul:
   2694          addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
   2695          addHRegUse(u, HRmRead,  i->ARM64in.Mul.argL);
   2696          addHRegUse(u, HRmRead,  i->ARM64in.Mul.argR);
   2697          return;
   2698       case ARM64in_LdrEX:
   2699          addHRegUse(u, HRmRead, hregARM64_X4());
   2700          addHRegUse(u, HRmWrite, hregARM64_X2());
   2701          return;
   2702       case ARM64in_StrEX:
   2703          addHRegUse(u, HRmRead, hregARM64_X4());
   2704          addHRegUse(u, HRmWrite, hregARM64_X0());
   2705          addHRegUse(u, HRmRead, hregARM64_X2());
   2706          return;
   2707       case ARM64in_MFence:
   2708          return;
   2709 //ZZ       case ARMin_CLREX:
   2710 //ZZ          return;
   2711       case ARM64in_VLdStS:
   2712          addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
   2713          if (i->ARM64in.VLdStS.isLoad) {
   2714             addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
   2715          } else {
   2716             addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
   2717          }
   2718          return;
   2719       case ARM64in_VLdStD:
   2720          addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
   2721          if (i->ARM64in.VLdStD.isLoad) {
   2722             addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
   2723          } else {
   2724             addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
   2725          }
   2726          return;
   2727       case ARM64in_VLdStQ:
   2728          addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
   2729          if (i->ARM64in.VLdStQ.isLoad)
   2730             addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
   2731          else
   2732             addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
   2733          return;
   2734       case ARM64in_VCvtI2F:
   2735          addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
   2736          addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
   2737          return;
   2738       case ARM64in_VCvtF2I:
   2739          addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
   2740          addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
   2741          return;
   2742       case ARM64in_VCvtSD:
   2743          addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
   2744          addHRegUse(u, HRmRead,  i->ARM64in.VCvtSD.src);
   2745          return;
   2746       case ARM64in_VUnaryD:
   2747          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
   2748          addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
   2749          return;
   2750       case ARM64in_VUnaryS:
   2751          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
   2752          addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
   2753          return;
   2754       case ARM64in_VBinD:
   2755          addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
   2756          addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
   2757          addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
   2758          return;
   2759       case ARM64in_VBinS:
   2760          addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
   2761          addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
   2762          addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
   2763          return;
   2764       case ARM64in_VCmpD:
   2765          addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
   2766          addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
   2767          return;
   2768       case ARM64in_VCmpS:
   2769          addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
   2770          addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
   2771          return;
   2772       case ARM64in_FPCR:
   2773          if (i->ARM64in.FPCR.toFPCR)
   2774             addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
   2775          else
   2776             addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
   2777          return;
   2778       case ARM64in_VBinV:
   2779          addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
   2780          addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
   2781          addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
   2782          return;
   2783       case ARM64in_VUnaryV:
   2784          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
   2785          addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
   2786          return;
   2787       case ARM64in_VNarrowV:
   2788          addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
   2789          addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
   2790          return;
   2791       case ARM64in_VShiftImmV:
   2792          addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
   2793          addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
   2794          return;
   2795 //ZZ       case ARMin_VAluS:
   2796 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
   2797 //ZZ          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
   2798 //ZZ          addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
   2799 //ZZ          return;
   2800 //ZZ       case ARMin_VUnaryS:
   2801 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
   2802 //ZZ          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
   2803 //ZZ          return;
   2804 //ZZ       case ARMin_VCMovD:
   2805 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
   2806 //ZZ          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
   2807 //ZZ          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
   2808 //ZZ          return;
   2809 //ZZ       case ARMin_VCMovS:
   2810 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
   2811 //ZZ          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
   2812 //ZZ          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
   2813 //ZZ          return;
   2814 //ZZ       case ARMin_VXferD:
   2815 //ZZ          if (i->ARMin.VXferD.toD) {
   2816 //ZZ             addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
   2817 //ZZ             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
   2818 //ZZ             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
   2819 //ZZ          } else {
   2820 //ZZ             addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
   2821 //ZZ             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
   2822 //ZZ             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
   2823 //ZZ          }
   2824 //ZZ          return;
   2825 //ZZ       case ARMin_VXferS:
   2826 //ZZ          if (i->ARMin.VXferS.toS) {
   2827 //ZZ             addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
   2828 //ZZ             addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
   2829 //ZZ          } else {
   2830 //ZZ             addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
   2831 //ZZ             addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
   2832 //ZZ          }
   2833 //ZZ          return;
   2834 //ZZ       case ARMin_VCvtID:
   2835 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
   2836 //ZZ          addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
   2837 //ZZ          return;
   2838 //ZZ       case ARMin_NLdStD:
   2839 //ZZ          if (i->ARMin.NLdStD.isLoad)
   2840 //ZZ             addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
   2841 //ZZ          else
   2842 //ZZ             addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
   2843 //ZZ          addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
   2844 //ZZ          return;
   2845 //ZZ       case ARMin_NUnary:
   2846 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
   2847 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
   2848 //ZZ          return;
   2849 //ZZ       case ARMin_NUnaryS:
   2850 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
   2851 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
   2852 //ZZ          return;
   2853 //ZZ       case ARMin_NShift:
   2854 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
   2855 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
   2856 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
   2857 //ZZ          return;
   2858 //ZZ       case ARMin_NShl64:
   2859 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
   2860 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
   2861 //ZZ          return;
   2862 //ZZ       case ARMin_NDual:
   2863 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
   2864 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
   2865 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
   2866 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
   2867 //ZZ          return;
   2868       case ARM64in_VImmQ:
   2869          addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
   2870          return;
   2871       case ARM64in_VDfromX:
   2872          addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
   2873          addHRegUse(u, HRmRead,  i->ARM64in.VDfromX.rX);
   2874          return;
   2875       case ARM64in_VQfromXX:
   2876          addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
   2877          addHRegUse(u, HRmRead,  i->ARM64in.VQfromXX.rXhi);
   2878          addHRegUse(u, HRmRead,  i->ARM64in.VQfromXX.rXlo);
   2879          return;
   2880       case ARM64in_VXfromQ:
   2881          addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
   2882          addHRegUse(u, HRmRead,  i->ARM64in.VXfromQ.rQ);
   2883          return;
   2884       case ARM64in_VMov:
   2885          addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
   2886          addHRegUse(u, HRmRead,  i->ARM64in.VMov.src);
   2887          return;
   2888 //ZZ       case ARMin_NBinary:
   2889 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
   2890 //ZZ          /* TODO: sometimes dst is also being read! */
   2891 //ZZ          // XXX fix this
   2892 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
   2893 //ZZ          addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
   2894 //ZZ          return;
   2895 //ZZ       case ARMin_NCMovQ:
   2896 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
   2897 //ZZ          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
   2898 //ZZ          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
   2899 //ZZ          return;
   2900 //ZZ       case ARMin_Add32:
   2901 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
   2902 //ZZ          addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
   2903 //ZZ          return;
   2904       case ARM64in_EvCheck:
   2905          /* We expect both amodes only to mention x21, so this is in
   2906             fact pointless, since x21 isn't allocatable, but
   2907             anyway.. */
   2908          addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
   2909          addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
   2910          addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
   2911          return;
   2912 //ZZ       case ARMin_ProfInc:
   2913 //ZZ          addHRegUse(u, HRmWrite, hregARM_R12());
   2914 //ZZ          addHRegUse(u, HRmWrite, hregARM_R11());
   2915 //ZZ          return;
   2916       default:
   2917          ppARM64Instr(i);
   2918          vpanic("getRegUsage_ARM64Instr");
   2919    }
   2920 }
   2921 
   2922 
   2923 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
   2924 {
   2925    vassert(mode64 == True);
   2926    switch (i->tag) {
   2927       case ARM64in_Arith:
   2928          i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
   2929          i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
   2930          mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
   2931          return;
   2932       case ARM64in_Cmp:
   2933          i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
   2934          mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
   2935          return;
   2936       case ARM64in_Logic:
   2937          i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
   2938          i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
   2939          mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
   2940          return;
   2941       case ARM64in_Test:
   2942          i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
   2943          mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
   2944          return;
   2945       case ARM64in_Shift:
   2946          i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
   2947          i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
   2948          mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
   2949          return;
   2950       case ARM64in_Unary:
   2951          i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
   2952          i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
   2953          return;
   2954       case ARM64in_MovI:
   2955          i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
   2956          i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
   2957          return;
   2958       case ARM64in_Imm64:
   2959          i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
   2960          return;
   2961       case ARM64in_LdSt64:
   2962          i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
   2963          mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
   2964          return;
   2965       case ARM64in_LdSt32:
   2966          i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
   2967          mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
   2968          return;
   2969       case ARM64in_LdSt16:
   2970          i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
   2971          mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
   2972          return;
   2973       case ARM64in_LdSt8:
   2974          i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
   2975          mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
   2976          return;
   2977       case ARM64in_XDirect:
   2978          mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
   2979          return;
   2980       case ARM64in_XIndir:
   2981          i->ARM64in.XIndir.dstGA
   2982             = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
   2983          mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
   2984          return;
   2985       case ARM64in_XAssisted:
   2986          i->ARM64in.XAssisted.dstGA
   2987             = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
   2988          mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
   2989          return;
   2990       case ARM64in_CSel:
   2991          i->ARM64in.CSel.dst  = lookupHRegRemap(m, i->ARM64in.CSel.dst);
   2992          i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
   2993          i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
   2994          return;
   2995       case ARM64in_Call:
   2996          return;
   2997       case ARM64in_AddToSP:
   2998          return;
   2999       case ARM64in_FromSP:
   3000          i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
   3001          return;
   3002       case ARM64in_Mul:
   3003          i->ARM64in.Mul.dst  = lookupHRegRemap(m, i->ARM64in.Mul.dst);
   3004          i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
   3005          i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
   3006          break;
   3007       case ARM64in_LdrEX:
   3008          return;
   3009       case ARM64in_StrEX:
   3010          return;
   3011       case ARM64in_MFence:
   3012          return;
   3013 //ZZ       case ARMin_CLREX:
   3014 //ZZ          return;
   3015       case ARM64in_VLdStS:
   3016          i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
   3017          i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
   3018          return;
   3019       case ARM64in_VLdStD:
   3020          i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
   3021          i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
   3022          return;
   3023       case ARM64in_VLdStQ:
   3024          i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
   3025          i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
   3026          return;
   3027       case ARM64in_VCvtI2F:
   3028          i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
   3029          i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
   3030          return;
   3031       case ARM64in_VCvtF2I:
   3032          i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
   3033          i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
   3034          return;
   3035       case ARM64in_VCvtSD:
   3036          i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
   3037          i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
   3038          return;
   3039       case ARM64in_VUnaryD:
   3040          i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
   3041          i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
   3042          return;
   3043       case ARM64in_VUnaryS:
   3044          i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
   3045          i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
   3046          return;
   3047       case ARM64in_VBinD:
   3048          i->ARM64in.VBinD.dst  = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
   3049          i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
   3050          i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
   3051          return;
   3052       case ARM64in_VBinS:
   3053          i->ARM64in.VBinS.dst  = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
   3054          i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
   3055          i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
   3056          return;
   3057       case ARM64in_VCmpD:
   3058          i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
   3059          i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
   3060          return;
   3061       case ARM64in_VCmpS:
   3062          i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
   3063          i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
   3064          return;
   3065       case ARM64in_FPCR:
   3066          i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
   3067          return;
   3068       case ARM64in_VBinV:
   3069          i->ARM64in.VBinV.dst  = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
   3070          i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
   3071          i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
   3072          return;
   3073       case ARM64in_VUnaryV:
   3074          i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
   3075          i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
   3076          return;
   3077       case ARM64in_VNarrowV:
   3078          i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
   3079          i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
   3080          return;
   3081       case ARM64in_VShiftImmV:
   3082          i->ARM64in.VShiftImmV.dst
   3083             = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
   3084          i->ARM64in.VShiftImmV.src
   3085             = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
   3086          return;
   3087 //ZZ       case ARMin_VAluS:
   3088 //ZZ          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
   3089 //ZZ          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
   3090 //ZZ          i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
   3091 //ZZ          return;
   3092 //ZZ       case ARMin_VCMovD:
   3093 //ZZ          i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
   3094 //ZZ          i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
   3095 //ZZ          return;
   3096 //ZZ       case ARMin_VCMovS:
   3097 //ZZ          i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
   3098 //ZZ          i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
   3099 //ZZ          return;
   3100 //ZZ       case ARMin_VXferD:
   3101 //ZZ          i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
   3102 //ZZ          i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
   3103 //ZZ          i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
   3104 //ZZ          return;
   3105 //ZZ       case ARMin_VXferS:
   3106 //ZZ          i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
   3107 //ZZ          i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
   3108 //ZZ          return;
   3109 //ZZ       case ARMin_VCvtID:
   3110 //ZZ          i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
   3111 //ZZ          i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
   3112 //ZZ          return;
   3113 //ZZ       case ARMin_NLdStD:
   3114 //ZZ          i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
   3115 //ZZ          mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
   3116 //ZZ          return;
   3117 //ZZ       case ARMin_NUnary:
   3118 //ZZ          i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
   3119 //ZZ          i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
   3120 //ZZ          return;
   3121 //ZZ       case ARMin_NUnaryS:
   3122 //ZZ          i->ARMin.NUnaryS.src->reg
   3123 //ZZ             = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
   3124 //ZZ          i->ARMin.NUnaryS.dst->reg
   3125 //ZZ             = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
   3126 //ZZ          return;
   3127 //ZZ       case ARMin_NShift:
   3128 //ZZ          i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
   3129 //ZZ          i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
   3130 //ZZ          i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
   3131 //ZZ          return;
   3132 //ZZ       case ARMin_NShl64:
   3133 //ZZ          i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
   3134 //ZZ          i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
   3135 //ZZ          return;
   3136 //ZZ       case ARMin_NDual:
   3137 //ZZ          i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
   3138 //ZZ          i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
   3139 //ZZ          return;
   3140       case ARM64in_VImmQ:
   3141          i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
   3142          return;
   3143       case ARM64in_VDfromX:
   3144          i->ARM64in.VDfromX.rD
   3145             = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
   3146          i->ARM64in.VDfromX.rX
   3147             = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
   3148          return;
   3149       case ARM64in_VQfromXX:
   3150          i->ARM64in.VQfromXX.rQ
   3151             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
   3152          i->ARM64in.VQfromXX.rXhi
   3153             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
   3154          i->ARM64in.VQfromXX.rXlo
   3155             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
   3156          return;
   3157       case ARM64in_VXfromQ:
   3158          i->ARM64in.VXfromQ.rX
   3159             = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
   3160          i->ARM64in.VXfromQ.rQ
   3161             = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
   3162          return;
   3163       case ARM64in_VMov:
   3164          i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
   3165          i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
   3166          return;
   3167 
   3168 //ZZ       case ARMin_NBinary:
   3169 //ZZ          i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
   3170 //ZZ          i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
   3171 //ZZ          i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
   3172 //ZZ          return;
   3173 //ZZ       case ARMin_NCMovQ:
   3174 //ZZ          i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
   3175 //ZZ          i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
   3176 //ZZ          return;
   3177 //ZZ       case ARMin_Add32:
   3178 //ZZ          i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
   3179 //ZZ          i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
   3180 //ZZ          return;
   3181       case ARM64in_EvCheck:
   3182          /* We expect both amodes only to mention x21, so this is in
   3183             fact pointless, since x21 isn't allocatable, but
   3184             anyway.. */
   3185          mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
   3186          mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
   3187          return;
   3188 //ZZ       case ARMin_ProfInc:
   3189 //ZZ          /* hardwires r11 and r12 -- nothing to modify. */
   3190 //ZZ          return;
   3191       default:
   3192          ppARM64Instr(i);
   3193          vpanic("mapRegs_ARM64Instr");
   3194    }
   3195 }
   3196 
   3197 /* Figure out if i represents a reg-reg move, and if so assign the
   3198    source and destination to *src and *dst.  If in doubt say No.  Used
   3199    by the register allocator to do move coalescing.
   3200 */
   3201 Bool isMove_ARM64Instr ( ARM64Instr* i, HReg* src, HReg* dst )
   3202 {
   3203    switch (i->tag) {
   3204       case ARM64in_MovI:
   3205          *src = i->ARM64in.MovI.src;
   3206          *dst = i->ARM64in.MovI.dst;
   3207          return True;
   3208       case ARM64in_VMov:
   3209          *src = i->ARM64in.VMov.src;
   3210          *dst = i->ARM64in.VMov.dst;
   3211          return True;
   3212       default:
   3213          break;
   3214    }
   3215 
   3216    return False;
   3217 }
   3218 
   3219 
   3220 /* Generate arm spill/reload instructions under the direction of the
   3221    register allocator.  Note it's critical these don't write the
   3222    condition codes. */
   3223 
   3224 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   3225                       HReg rreg, Int offsetB, Bool mode64 )
   3226 {
   3227    HRegClass rclass;
   3228    vassert(offsetB >= 0);
   3229    vassert(!hregIsVirtual(rreg));
   3230    vassert(mode64 == True);
   3231    *i1 = *i2 = NULL;
   3232    rclass = hregClass(rreg);
   3233    switch (rclass) {
   3234       case HRcInt64:
   3235          vassert(0 == (offsetB & 7));
   3236          offsetB >>= 3;
   3237          vassert(offsetB < 4096);
   3238          *i1 = ARM64Instr_LdSt64(
   3239                   False/*!isLoad*/,
   3240                   rreg,
   3241                   ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
   3242                );
   3243          return;
   3244       case HRcFlt64:
   3245          vassert(0 == (offsetB & 7));
   3246          vassert(offsetB >= 0 && offsetB < 32768);
   3247          *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
   3248                                  rreg, hregARM64_X21(), offsetB);
   3249          return;
   3250       case HRcVec128: {
   3251          HReg x21  = hregARM64_X21();  // baseblock
   3252          HReg x9   = hregARM64_X9();   // spill temporary
   3253          vassert(0 == (offsetB & 15)); // check sane alignment
   3254          vassert(offsetB < 4096);
   3255          *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
   3256          *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
   3257          return;
   3258       }
   3259       default:
   3260          ppHRegClass(rclass);
   3261          vpanic("genSpill_ARM: unimplemented regclass");
   3262    }
   3263 }
   3264 
   3265 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   3266                        HReg rreg, Int offsetB, Bool mode64 )
   3267 {
   3268    HRegClass rclass;
   3269    vassert(offsetB >= 0);
   3270    vassert(!hregIsVirtual(rreg));
   3271    vassert(mode64 == True);
   3272    *i1 = *i2 = NULL;
   3273    rclass = hregClass(rreg);
   3274    switch (rclass) {
   3275       case HRcInt64:
   3276          vassert(0 == (offsetB & 7));
   3277          offsetB >>= 3;
   3278          vassert(offsetB < 4096);
   3279          *i1 = ARM64Instr_LdSt64(
   3280                   True/*isLoad*/,
   3281                   rreg,
   3282                   ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
   3283                );
   3284          return;
   3285       case HRcFlt64:
   3286          vassert(0 == (offsetB & 7));
   3287          vassert(offsetB >= 0 && offsetB < 32768);
   3288          *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
   3289                                  rreg, hregARM64_X21(), offsetB);
   3290          return;
   3291       case HRcVec128: {
   3292          HReg x21  = hregARM64_X21();  // baseblock
   3293          HReg x9   = hregARM64_X9();   // spill temporary
   3294          vassert(0 == (offsetB & 15)); // check sane alignment
   3295          vassert(offsetB < 4096);
   3296          *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
   3297          *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
   3298          return;
   3299       }
   3300       default:
   3301          ppHRegClass(rclass);
   3302          vpanic("genReload_ARM: unimplemented regclass");
   3303    }
   3304 }
   3305 
   3306 
   3307 //ZZ /* Emit an instruction into buf and return the number of bytes used.
   3308 //ZZ    Note that buf is not the insn's final place, and therefore it is
   3309 //ZZ    imperative to emit position-independent code. */
   3310 
   3311 static inline UChar iregNo ( HReg r )
   3312 {
   3313    UInt n;
   3314    vassert(hregClass(r) == HRcInt64);
   3315    vassert(!hregIsVirtual(r));
   3316    n = hregNumber(r);
   3317    vassert(n <= 30);
   3318    return toUChar(n);
   3319 }
   3320 
   3321 static inline UChar dregNo ( HReg r )
   3322 {
   3323    UInt n;
   3324    vassert(hregClass(r) == HRcFlt64 || hregClass(r) == HRcInt64);
   3325    vassert(!hregIsVirtual(r));
   3326    n = hregNumber(r);
   3327    vassert(n <= 31);
   3328    return toUChar(n);
   3329 }
   3330 
   3331 static inline UChar qregNo ( HReg r )
   3332 {
   3333    UInt n;
   3334    vassert(hregClass(r) == HRcVec128);
   3335    vassert(!hregIsVirtual(r));
   3336    n = hregNumber(r);
   3337    vassert(n <= 31);
   3338    return toUChar(n);
   3339 }
   3340 
   3341 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
   3342    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
   3343 
   3344 #define X00  BITS4(0,0, 0,0)
   3345 #define X01  BITS4(0,0, 0,1)
   3346 #define X10  BITS4(0,0, 1,0)
   3347 #define X11  BITS4(0,0, 1,1)
   3348 
   3349 #define X000 BITS4(0, 0,0,0)
   3350 #define X001 BITS4(0, 0,0,1)
   3351 #define X010 BITS4(0, 0,1,0)
   3352 #define X011 BITS4(0, 0,1,1)
   3353 #define X100 BITS4(0, 1,0,0)
   3354 #define X101 BITS4(0, 1,0,1)
   3355 #define X110 BITS4(0, 1,1,0)
   3356 #define X111 BITS4(0, 1,1,1)
   3357 
   3358 #define X0000 BITS4(0,0,0,0)
   3359 #define X0001 BITS4(0,0,0,1)
   3360 #define X0010 BITS4(0,0,1,0)
   3361 #define X0011 BITS4(0,0,1,1)
   3362 
   3363 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
   3364   ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
   3365 
   3366 #define X00000   BITS8(0,0,0, 0,0,0,0,0)
   3367 #define X00001   BITS8(0,0,0, 0,0,0,0,1)
   3368 #define X00111   BITS8(0,0,0, 0,0,1,1,1)
   3369 #define X01000   BITS8(0,0,0, 0,1,0,0,0)
   3370 #define X10000   BITS8(0,0,0, 1,0,0,0,0)
   3371 #define X11000   BITS8(0,0,0, 1,1,0,0,0)
   3372 #define X11110   BITS8(0,0,0, 1,1,1,1,0)
   3373 #define X11111   BITS8(0,0,0, 1,1,1,1,1)
   3374 
   3375 #define X000000  BITS8(0,0, 0,0,0,0,0,0)
   3376 #define X000001  BITS8(0,0, 0,0,0,0,0,1)
   3377 #define X000100  BITS8(0,0, 0,0,0,1,0,0)
   3378 #define X000111  BITS8(0,0, 0,0,0,1,1,1)
   3379 #define X001000  BITS8(0,0, 0,0,1,0,0,0)
   3380 #define X001001  BITS8(0,0, 0,0,1,0,0,1)
   3381 #define X001010  BITS8(0,0, 0,0,1,0,1,0)
   3382 #define X001101  BITS8(0,0, 0,0,1,1,0,1)
   3383 #define X001110  BITS8(0,0, 0,0,1,1,1,0)
   3384 #define X001111  BITS8(0,0, 0,0,1,1,1,1)
   3385 #define X010000  BITS8(0,0, 0,1,0,0,0,0)
   3386 #define X010001  BITS8(0,0, 0,1,0,0,0,1)
   3387 #define X010101  BITS8(0,0, 0,1,0,1,0,1)
   3388 #define X010110  BITS8(0,0, 0,1,0,1,1,0)
   3389 #define X011001  BITS8(0,0, 0,1,1,0,0,1)
   3390 #define X011010  BITS8(0,0, 0,1,1,0,1,0)
   3391 #define X011011  BITS8(0,0, 0,1,1,0,1,1)
   3392 #define X011110  BITS8(0,0, 0,1,1,1,1,0)
   3393 #define X011111  BITS8(0,0, 0,1,1,1,1,1)
   3394 #define X100000  BITS8(0,0, 1,0,0,0,0,0)
   3395 #define X100001  BITS8(0,0, 1,0,0,0,0,1)
   3396 #define X100011  BITS8(0,0, 1,0,0,0,1,1)
   3397 #define X100100  BITS8(0,0, 1,0,0,1,0,0)
   3398 #define X100101  BITS8(0,0, 1,0,0,1,0,1)
   3399 #define X100110  BITS8(0,0, 1,0,0,1,1,0)
   3400 #define X100111  BITS8(0,0, 1,0,0,1,1,1)
   3401 #define X101000  BITS8(0,0, 1,0,1,0,0,0)
   3402 #define X110000  BITS8(0,0, 1,1,0,0,0,0)
   3403 #define X110001  BITS8(0,0, 1,1,0,0,0,1)
   3404 #define X110101  BITS8(0,0, 1,1,0,1,0,1)
   3405 #define X110111  BITS8(0,0, 1,1,0,1,1,1)
   3406 #define X111000  BITS8(0,0, 1,1,1,0,0,0)
   3407 #define X111001  BITS8(0,0, 1,1,1,0,0,1)
   3408 #define X111101  BITS8(0,0, 1,1,1,1,0,1)
   3409 #define X111110  BITS8(0,0, 1,1,1,1,1,0)
   3410 #define X111111  BITS8(0,0, 1,1,1,1,1,1)
   3411 
   3412 #define X0001000  BITS8(0, 0,0,0,1,0,0,0)
   3413 #define X0010000  BITS8(0, 0,0,1,0,0,0,0)
   3414 #define X0100000  BITS8(0, 0,1,0,0,0,0,0)
   3415 #define X1000000  BITS8(0, 1,0,0,0,0,0,0)
   3416 
   3417 #define X00100000  BITS8(0,0,1,0,0,0,0,0)
   3418 #define X00100001  BITS8(0,0,1,0,0,0,0,1)
   3419 #define X00100010  BITS8(0,0,1,0,0,0,1,0)
   3420 #define X00100011  BITS8(0,0,1,0,0,0,1,1)
   3421 #define X01010000  BITS8(0,1,0,1,0,0,0,0)
   3422 #define X01010001  BITS8(0,1,0,1,0,0,0,1)
   3423 #define X01010100  BITS8(0,1,0,1,0,1,0,0)
   3424 #define X01011000  BITS8(0,1,0,1,1,0,0,0)
   3425 #define X01100000  BITS8(0,1,1,0,0,0,0,0)
   3426 #define X01100001  BITS8(0,1,1,0,0,0,0,1)
   3427 #define X01100010  BITS8(0,1,1,0,0,0,1,0)
   3428 #define X01100011  BITS8(0,1,1,0,0,0,1,1)
   3429 #define X01110000  BITS8(0,1,1,1,0,0,0,0)
   3430 #define X01110001  BITS8(0,1,1,1,0,0,0,1)
   3431 #define X01110011  BITS8(0,1,1,1,0,0,1,1)
   3432 #define X01110101  BITS8(0,1,1,1,0,1,0,1)
   3433 #define X01110111  BITS8(0,1,1,1,0,1,1,1)
   3434 #define X11000001  BITS8(1,1,0,0,0,0,0,1)
   3435 #define X11000011  BITS8(1,1,0,0,0,0,1,1)
   3436 #define X11010100  BITS8(1,1,0,1,0,1,0,0)
   3437 #define X11010110  BITS8(1,1,0,1,0,1,1,0)
   3438 #define X11011000  BITS8(1,1,0,1,1,0,0,0)
   3439 #define X11011010  BITS8(1,1,0,1,1,0,1,0)
   3440 #define X11011110  BITS8(1,1,0,1,1,1,1,0)
   3441 #define X11110001  BITS8(1,1,1,1,0,0,0,1)
   3442 #define X11110011  BITS8(1,1,1,1,0,0,1,1)
   3443 
   3444 #define BITS9(zzb8,zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
   3445   ((BITS8(zzb8,zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1) << 1) | zzb0)
   3446 
   3447 #define X111100111 BITS9(1,1,1,1,0,0,1,1,1)
   3448 #define X111100101 BITS9(1,1,1,1,0,0,1,0,1)
   3449 
   3450 
   3451 /* --- 4 fields --- */
   3452 
   3453 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
   3454    vassert(8+19+1+4 == 32);
   3455    vassert(f1 < (1<<8));
   3456    vassert(f2 < (1<<19));
   3457    vassert(f3 < (1<<1));
   3458    vassert(f4 < (1<<4));
   3459    UInt w = 0;
   3460    w = (w <<  8) | f1;
   3461    w = (w << 19) | f2;
   3462    w = (w <<  1) | f3;
   3463    w = (w <<  4) | f4;
   3464    return w;
   3465 }
   3466 
   3467 /* --- 5 fields --- */
   3468 
   3469 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
   3470                                   UInt f3, UInt f4, UInt f5 ) {
   3471    vassert(3+6+2+16+5 == 32);
   3472    vassert(f1 < (1<<3));
   3473    vassert(f2 < (1<<6));
   3474    vassert(f3 < (1<<2));
   3475    vassert(f4 < (1<<16));
   3476    vassert(f5 < (1<<5));
   3477    UInt w = 0;
   3478    w = (w <<  3) | f1;
   3479    w = (w <<  6) | f2;
   3480    w = (w <<  2) | f3;
   3481    w = (w << 16) | f4;
   3482    w = (w <<  5) | f5;
   3483    return w;
   3484 }
   3485 
   3486 /* --- 6 fields --- */
   3487 
   3488 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
   3489                                     UInt f4, UInt f5, UInt f6 ) {
   3490    vassert(2+6+2+12+5+5 == 32);
   3491    vassert(f1 < (1<<2));
   3492    vassert(f2 < (1<<6));
   3493    vassert(f3 < (1<<2));
   3494    vassert(f4 < (1<<12));
   3495    vassert(f5 < (1<<5));
   3496    vassert(f6 < (1<<5));
   3497    UInt w = 0;
   3498    w = (w <<  2) | f1;
   3499    w = (w <<  6) | f2;
   3500    w = (w <<  2) | f3;
   3501    w = (w << 12) | f4;
   3502    w = (w <<  5) | f5;
   3503    w = (w <<  5) | f6;
   3504    return w;
   3505 }
   3506 
   3507 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
   3508                                    UInt f4, UInt f5, UInt f6 ) {
   3509    vassert(3+8+5+6+5+5 == 32);
   3510    vassert(f1 < (1<<3));
   3511    vassert(f2 < (1<<8));
   3512    vassert(f3 < (1<<5));
   3513    vassert(f4 < (1<<6));
   3514    vassert(f5 < (1<<5));
   3515    vassert(f6 < (1<<5));
   3516    UInt w = 0;
   3517    w = (w <<  3) | f1;
   3518    w = (w <<  8) | f2;
   3519    w = (w <<  5) | f3;
   3520    w = (w <<  6) | f4;
   3521    w = (w <<  5) | f5;
   3522    w = (w <<  5) | f6;
   3523    return w;
   3524 }
   3525 
   3526 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
   3527                                    UInt f4, UInt f5, UInt f6 ) {
   3528    vassert(3+8+5+6+5+5 == 32);
   3529    vassert(f1 < (1<<3));
   3530    vassert(f2 < (1<<5));
   3531    vassert(f3 < (1<<8));
   3532    vassert(f4 < (1<<6));
   3533    vassert(f5 < (1<<5));
   3534    vassert(f6 < (1<<5));
   3535    UInt w = 0;
   3536    w = (w <<  3) | f1;
   3537    w = (w <<  5) | f2;
   3538    w = (w <<  8) | f3;
   3539    w = (w <<  6) | f4;
   3540    w = (w <<  5) | f5;
   3541    w = (w <<  5) | f6;
   3542    return w;
   3543 }
   3544 
   3545 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
   3546                                    UInt f4, UInt f5, UInt f6 ) {
   3547    vassert(3+6+7+6+5+5 == 32);
   3548    vassert(f1 < (1<<3));
   3549    vassert(f2 < (1<<6));
   3550    vassert(f3 < (1<<7));
   3551    vassert(f4 < (1<<6));
   3552    vassert(f5 < (1<<5));
   3553    vassert(f6 < (1<<5));
   3554    UInt w = 0;
   3555    w = (w <<  3) | f1;
   3556    w = (w <<  6) | f2;
   3557    w = (w <<  7) | f3;
   3558    w = (w <<  6) | f4;
   3559    w = (w <<  5) | f5;
   3560    w = (w <<  5) | f6;
   3561    return w;
   3562 }
   3563 
   3564 /* --- 7 fields --- */
   3565 
   3566 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
   3567                                      UInt f4, UInt f5, UInt f6, UInt f7 ) {
   3568    vassert(2+6+3+9+2+5+5 == 32);
   3569    vassert(f1 < (1<<2));
   3570    vassert(f2 < (1<<6));
   3571    vassert(f3 < (1<<3));
   3572    vassert(f4 < (1<<9));
   3573    vassert(f5 < (1<<2));
   3574    vassert(f6 < (1<<5));
   3575    vassert(f7 < (1<<5));
   3576    UInt w = 0;
   3577    w = (w << 2) | f1;
   3578    w = (w << 6) | f2;
   3579    w = (w << 3) | f3;
   3580    w = (w << 9) | f4;
   3581    w = (w << 2) | f5;
   3582    w = (w << 5) | f6;
   3583    w = (w << 5) | f7;
   3584    return w;
   3585 }
   3586 
   3587 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
   3588                                      UInt f4, UInt f5, UInt f6, UInt f7 ) {
   3589    vassert(3+6+1+6+6+5+5 == 32);
   3590    vassert(f1 < (1<<3));
   3591    vassert(f2 < (1<<6));
   3592    vassert(f3 < (1<<1));
   3593    vassert(f4 < (1<<6));
   3594    vassert(f5 < (1<<6));
   3595    vassert(f6 < (1<<5));
   3596    vassert(f7 < (1<<5));
   3597    UInt w = 0;
   3598    w = (w << 3) | f1;
   3599    w = (w << 6) | f2;
   3600    w = (w << 1) | f3;
   3601    w = (w << 6) | f4;
   3602    w = (w << 6) | f5;
   3603    w = (w << 5) | f6;
   3604    w = (w << 5) | f7;
   3605    return w;
   3606 }
   3607 
   3608 static inline UInt X_9_1_6_4_6_1_1_4( UInt f1, UInt f2, UInt f3, UInt f4,
   3609                                         UInt f5, UInt f6, UInt f7, UInt f8) {
   3610    vassert(9+1+6+4+6+1+1+4 == 32);
   3611    vassert(f1 < (1<<9));
   3612    vassert(f2 < (1<<1));
   3613    vassert(f3 < (1<<6));
   3614    vassert(f4 < (1<<4));
   3615    vassert(f5 < (1<<6));
   3616    vassert(f6 < (1<<1));
   3617    vassert(f7 < (1<<1));
   3618    vassert(f8 < (1<<9));
   3619    UInt w = 0;
   3620    w = (w << 9) | f1;
   3621    w = (w << 1) | f2;
   3622    w = (w << 6) | f3;
   3623    w = (w << 4) | f4;
   3624    w = (w << 6) | f5;
   3625    w = (w << 1) | f6;
   3626    w = (w << 1) | f7;
   3627    w = (w << 4) | f8;
   3628    return w;
   3629 }
   3630 
   3631 
   3632 //ZZ #define X0000  BITS4(0,0,0,0)
   3633 //ZZ #define X0001  BITS4(0,0,0,1)
   3634 //ZZ #define X0010  BITS4(0,0,1,0)
   3635 //ZZ #define X0011  BITS4(0,0,1,1)
   3636 //ZZ #define X0100  BITS4(0,1,0,0)
   3637 //ZZ #define X0101  BITS4(0,1,0,1)
   3638 //ZZ #define X0110  BITS4(0,1,1,0)
   3639 //ZZ #define X0111  BITS4(0,1,1,1)
   3640 //ZZ #define X1000  BITS4(1,0,0,0)
   3641 //ZZ #define X1001  BITS4(1,0,0,1)
   3642 //ZZ #define X1010  BITS4(1,0,1,0)
   3643 //ZZ #define X1011  BITS4(1,0,1,1)
   3644 //ZZ #define X1100  BITS4(1,1,0,0)
   3645 //ZZ #define X1101  BITS4(1,1,0,1)
   3646 //ZZ #define X1110  BITS4(1,1,1,0)
   3647 //ZZ #define X1111  BITS4(1,1,1,1)
   3648 /*
   3649 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
   3650    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   3651     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   3652     (((zzx3) & 0xF) << 12))
   3653 
   3654 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
   3655    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   3656     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   3657     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
   3658 
   3659 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
   3660    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   3661     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   3662     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
   3663 
   3664 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
   3665   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
   3666    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
   3667    (((zzx0) & 0xF) << 0))
   3668 
   3669 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
   3670    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   3671     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   3672     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
   3673     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
   3674 
   3675 #define XX______(zzx7,zzx6) \
   3676    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
   3677 */
   3678 //ZZ /* Generate a skeletal insn that involves an a RI84 shifter operand.
   3679 //ZZ    Returns a word which is all zeroes apart from bits 25 and 11..0,
   3680 //ZZ    since it is those that encode the shifter operand (at least to the
   3681 //ZZ    extent that we care about it.) */
   3682 //ZZ static UInt skeletal_RI84 ( ARMRI84* ri )
   3683 //ZZ {
   3684 //ZZ    UInt instr;
   3685 //ZZ    if (ri->tag == ARMri84_I84) {
   3686 //ZZ       vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
   3687 //ZZ       vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
   3688 //ZZ       instr = 1 << 25;
   3689 //ZZ       instr |= (ri->ARMri84.I84.imm4 << 8);
   3690 //ZZ       instr |= ri->ARMri84.I84.imm8;
   3691 //ZZ    } else {
   3692 //ZZ       instr = 0 << 25;
   3693 //ZZ       instr |= iregNo(ri->ARMri84.R.reg);
   3694 //ZZ    }
   3695 //ZZ    return instr;
   3696 //ZZ }
   3697 //ZZ
   3698 //ZZ /* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
   3699 //ZZ    11..7. */
   3700 //ZZ static UInt skeletal_RI5 ( ARMRI5* ri )
   3701 //ZZ {
   3702 //ZZ    UInt instr;
   3703 //ZZ    if (ri->tag == ARMri5_I5) {
   3704 //ZZ       UInt imm5 = ri->ARMri5.I5.imm5;
   3705 //ZZ       vassert(imm5 >= 1 && imm5 <= 31);
   3706 //ZZ       instr = 0 << 4;
   3707 //ZZ       instr |= imm5 << 7;
   3708 //ZZ    } else {
   3709 //ZZ       instr = 1 << 4;
   3710 //ZZ       instr |= iregNo(ri->ARMri5.R.reg) << 8;
   3711 //ZZ    }
   3712 //ZZ    return instr;
   3713 //ZZ }
   3714 
   3715 
   3716 /* Get an immediate into a register, using only that register. */
   3717 static UInt* imm64_to_iregNo ( UInt* p, Int xD, ULong imm64 )
   3718 {
   3719    if (imm64 == 0) {
   3720       // This has to be special-cased, since the logic below
   3721       // will leave the register unchanged in this case.
   3722       // MOVZ xD, #0, LSL #0
   3723       *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
   3724       return p;
   3725    }
   3726 
   3727    // There must be at least one non-zero halfword.  Find the
   3728    // lowest nonzero such, and use MOVZ to install it and zero
   3729    // out the rest of the register.
   3730    UShort h[4];
   3731    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   3732    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   3733    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   3734    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   3735 
   3736    UInt i;
   3737    for (i = 0; i < 4; i++) {
   3738       if (h[i] != 0)
   3739          break;
   3740    }
   3741    vassert(i < 4);
   3742 
   3743    // MOVZ xD, h[i], LSL (16*i)
   3744    *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   3745 
   3746    // Work on upwards through h[i], using MOVK to stuff in any
   3747    // remaining nonzero elements.
   3748    i++;
   3749    for (; i < 4; i++) {
   3750       if (h[i] == 0)
   3751          continue;
   3752       // MOVK xD, h[i], LSL (16*i)
   3753       *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   3754    }
   3755 
   3756    return p;
   3757 }
   3758 
   3759 /* Get an immediate into a register, using only that register, and
   3760    generating exactly 4 instructions, regardless of the value of the
   3761    immediate. This is used when generating sections of code that need
   3762    to be patched later, so as to guarantee a specific size. */
   3763 static UInt* imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
   3764 {
   3765    UShort h[4];
   3766    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   3767    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   3768    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   3769    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   3770    // Work on upwards through h[i], using MOVK to stuff in the
   3771    // remaining elements.
   3772    UInt i;
   3773    for (i = 0; i < 4; i++) {
   3774       if (i == 0) {
   3775          // MOVZ xD, h[0], LSL (16*0)
   3776          *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   3777       } else {
   3778          // MOVK xD, h[i], LSL (16*i)
   3779          *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   3780       }
   3781    }
   3782    return p;
   3783 }
   3784 
   3785 /* Check whether p points at a 4-insn sequence cooked up by
   3786    imm64_to_iregNo_EXACTLY4(). */
   3787 static Bool is_imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
   3788 {
   3789    UShort h[4];
   3790    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   3791    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   3792    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   3793    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   3794    // Work on upwards through h[i], using MOVK to stuff in the
   3795    // remaining elements.
   3796    UInt i;
   3797    for (i = 0; i < 4; i++) {
   3798       UInt expected;
   3799       if (i == 0) {
   3800          // MOVZ xD, h[0], LSL (16*0)
   3801          expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   3802       } else {
   3803          // MOVK xD, h[i], LSL (16*i)
   3804          expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   3805       }
   3806       if (p[i] != expected)
   3807          return False;
   3808    }
   3809    return True;
   3810 }
   3811 
   3812 
   3813 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
   3814    rD, using the given amode for the address. */
   3815 static UInt* do_load_or_store8 ( UInt* p,
   3816                                  Bool isLoad, UInt wD, ARM64AMode* am )
   3817 {
   3818    vassert(wD <= 30);
   3819    if (am->tag == ARM64am_RI9) {
   3820       /* STURB Wd, [Xn|SP + simm9]:  00 111000 000 simm9 00 n d
   3821          LDURB Wd, [Xn|SP + simm9]:  00 111000 010 simm9 00 n d
   3822       */
   3823       Int simm9 = am->ARM64am.RI9.simm9;
   3824       vassert(-256 <= simm9 && simm9 <= 255);
   3825       UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
   3826                                    simm9 & 0x1FF, X00,
   3827                                    iregNo(am->ARM64am.RI9.reg), wD);
   3828       *p++ = instr;
   3829       return p;
   3830    }
   3831    if (am->tag == ARM64am_RI12) {
   3832       /* STRB Wd, [Xn|SP + uimm12 * 1]:  00 111 001 00 imm12 n d
   3833          LDRB Wd, [Xn|SP + uimm12 * 1]:  00 111 001 01 imm12 n d
   3834       */
   3835       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3836       UInt scale  = am->ARM64am.RI12.szB;
   3837       vassert(scale == 1); /* failure of this is serious.  Do not ignore. */
   3838       UInt xN    = iregNo(am->ARM64am.RI12.reg);
   3839       vassert(xN <= 30);
   3840       UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
   3841                                   uimm12, xN, wD);
   3842       *p++ = instr;
   3843       return p;
   3844    }
   3845    if (am->tag == ARM64am_RR) {
   3846       /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
   3847          LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
   3848       */
   3849       UInt xN = iregNo(am->ARM64am.RR.base);
   3850       UInt xM = iregNo(am->ARM64am.RR.index);
   3851       vassert(xN <= 30);
   3852       UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
   3853                                  xM, X011010, xN, wD);
   3854       *p++ = instr;
   3855       return p;
   3856    }
   3857    vpanic("do_load_or_store8");
   3858    vassert(0);
   3859 }
   3860 
   3861 
   3862 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
   3863    rD, using the given amode for the address. */
   3864 static UInt* do_load_or_store16 ( UInt* p,
   3865                                   Bool isLoad, UInt wD, ARM64AMode* am )
   3866 {
   3867    vassert(wD <= 30);
   3868    if (am->tag == ARM64am_RI9) {
   3869       /* STURH Wd, [Xn|SP + simm9]:  01 111000 000 simm9 00 n d
   3870          LDURH Wd, [Xn|SP + simm9]:  01 111000 010 simm9 00 n d
   3871       */
   3872       Int simm9 = am->ARM64am.RI9.simm9;
   3873       vassert(-256 <= simm9 && simm9 <= 255);
   3874       UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
   3875                                    simm9 & 0x1FF, X00,
   3876                                    iregNo(am->ARM64am.RI9.reg), wD);
   3877       *p++ = instr;
   3878       return p;
   3879    }
   3880    if (am->tag == ARM64am_RI12) {
   3881       /* STRH Wd, [Xn|SP + uimm12 * 2]:  01 111 001 00 imm12 n d
   3882          LDRH Wd, [Xn|SP + uimm12 * 2]:  01 111 001 01 imm12 n d
   3883       */
   3884       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3885       UInt scale  = am->ARM64am.RI12.szB;
   3886       vassert(scale == 2); /* failure of this is serious.  Do not ignore. */
   3887       UInt xN    = iregNo(am->ARM64am.RI12.reg);
   3888       vassert(xN <= 30);
   3889       UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
   3890                                   uimm12, xN, wD);
   3891       *p++ = instr;
   3892       return p;
   3893    }
   3894    if (am->tag == ARM64am_RR) {
   3895       /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
   3896          LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
   3897       */
   3898       UInt xN = iregNo(am->ARM64am.RR.base);
   3899       UInt xM = iregNo(am->ARM64am.RR.index);
   3900       vassert(xN <= 30);
   3901       UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
   3902                                  xM, X011010, xN, wD);
   3903       *p++ = instr;
   3904       return p;
   3905    }
   3906    vpanic("do_load_or_store16");
   3907    vassert(0);
   3908 }
   3909 
   3910 
   3911 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
   3912    rD, using the given amode for the address. */
   3913 static UInt* do_load_or_store32 ( UInt* p,
   3914                                   Bool isLoad, UInt wD, ARM64AMode* am )
   3915 {
   3916    vassert(wD <= 30);
   3917    if (am->tag == ARM64am_RI9) {
   3918       /* STUR Wd, [Xn|SP + simm9]:  10 111000 000 simm9 00 n d
   3919          LDUR Wd, [Xn|SP + simm9]:  10 111000 010 simm9 00 n d
   3920       */
   3921       Int simm9 = am->ARM64am.RI9.simm9;
   3922       vassert(-256 <= simm9 && simm9 <= 255);
   3923       UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
   3924                                    simm9 & 0x1FF, X00,
   3925                                    iregNo(am->ARM64am.RI9.reg), wD);
   3926       *p++ = instr;
   3927       return p;
   3928    }
   3929    if (am->tag == ARM64am_RI12) {
   3930       /* STR Wd, [Xn|SP + uimm12 * 4]:  10 111 001 00 imm12 n d
   3931          LDR Wd, [Xn|SP + uimm12 * 4]:  10 111 001 01 imm12 n d
   3932       */
   3933       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3934       UInt scale  = am->ARM64am.RI12.szB;
   3935       vassert(scale == 4); /* failure of this is serious.  Do not ignore. */
   3936       UInt xN    = iregNo(am->ARM64am.RI12.reg);
   3937       vassert(xN <= 30);
   3938       UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
   3939                                   uimm12, xN, wD);
   3940       *p++ = instr;
   3941       return p;
   3942    }
   3943    if (am->tag == ARM64am_RR) {
   3944       /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
   3945          LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
   3946       */
   3947       UInt xN = iregNo(am->ARM64am.RR.base);
   3948       UInt xM = iregNo(am->ARM64am.RR.index);
   3949       vassert(xN <= 30);
   3950       UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
   3951                                  xM, X011010, xN, wD);
   3952       *p++ = instr;
   3953       return p;
   3954    }
   3955    vpanic("do_load_or_store32");
   3956    vassert(0);
   3957 }
   3958 
   3959 
   3960 /* Generate a 64 bit load or store to/from xD, using the given amode
   3961    for the address. */
   3962 static UInt* do_load_or_store64 ( UInt* p,
   3963                                   Bool isLoad, UInt xD, ARM64AMode* am )
   3964 {
   3965    /* In all these cases, Rn can't be 31 since that means SP. */
   3966    vassert(xD <= 30);
   3967    if (am->tag == ARM64am_RI9) {
   3968       /* STUR Xd, [Xn|SP + simm9]:  11 111000 000 simm9 00 n d
   3969          LDUR Xd, [Xn|SP + simm9]:  11 111000 010 simm9 00 n d
   3970       */
   3971       Int simm9 = am->ARM64am.RI9.simm9;
   3972       vassert(-256 <= simm9 && simm9 <= 255);
   3973       UInt xN = iregNo(am->ARM64am.RI9.reg);
   3974       vassert(xN <= 30);
   3975       UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
   3976                                    simm9 & 0x1FF, X00, xN, xD);
   3977       *p++ = instr;
   3978       return p;
   3979    }
   3980    if (am->tag == ARM64am_RI12) {
   3981       /* STR Xd, [Xn|SP + uimm12 * 8]:  11 111 001 00 imm12 n d
   3982          LDR Xd, [Xn|SP + uimm12 * 8]:  11 111 001 01 imm12 n d
   3983       */
   3984       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3985       UInt scale  = am->ARM64am.RI12.szB;
   3986       vassert(scale == 8); /* failure of this is serious.  Do not ignore. */
   3987       UInt xN    = iregNo(am->ARM64am.RI12.reg);
   3988       vassert(xN <= 30);
   3989       UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
   3990                                   uimm12, xN, xD);
   3991       *p++ = instr;
   3992       return p;
   3993    }
   3994    if (am->tag == ARM64am_RR) {
   3995       /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
   3996          LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
   3997       */
   3998       UInt xN = iregNo(am->ARM64am.RR.base);
   3999       UInt xM = iregNo(am->ARM64am.RR.index);
   4000       vassert(xN <= 30);
   4001       UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
   4002                                  xM, X011010, xN, xD);
   4003       *p++ = instr;
   4004       return p;
   4005    }
   4006    vpanic("do_load_or_store64");
   4007    vassert(0);
   4008 }
   4009 
   4010 
   4011 /* Emit an instruction into buf and return the number of bytes used.
   4012    Note that buf is not the insn's final place, and therefore it is
   4013    imperative to emit position-independent code.  If the emitted
   4014    instruction was a profiler inc, set *is_profInc to True, else
   4015    leave it unchanged. */
   4016 
   4017 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
   4018                       UChar* buf, Int nbuf, ARM64Instr* i,
   4019                       Bool mode64,
   4020                       void* disp_cp_chain_me_to_slowEP,
   4021                       void* disp_cp_chain_me_to_fastEP,
   4022                       void* disp_cp_xindir,
   4023                       void* disp_cp_xassisted )
   4024 {
   4025    UInt* p = (UInt*)buf;
   4026    vassert(nbuf >= 32);
   4027    vassert(mode64 == True);
   4028    vassert(0 == (((HWord)buf) & 3));
   4029 
   4030    switch (i->tag) {
   4031       case ARM64in_Arith: {
   4032          UInt      rD   = iregNo(i->ARM64in.Arith.dst);
   4033          UInt      rN   = iregNo(i->ARM64in.Arith.argL);
   4034          ARM64RIA* argR = i->ARM64in.Arith.argR;
   4035          switch (argR->tag) {
   4036             case ARM64riA_I12:
   4037                *p++ = X_2_6_2_12_5_5(
   4038                          i->ARM64in.Arith.isAdd ? X10 : X11,
   4039                          X010001,
   4040                          argR->ARM64riA.I12.shift == 12 ? X01 : X00,
   4041                          argR->ARM64riA.I12.imm12, rN, rD
   4042                       );
   4043                break;
   4044             case ARM64riA_R: {
   4045                UInt rM = iregNo(i->ARM64in.Arith.argR->ARM64riA.R.reg);
   4046                *p++ = X_3_8_5_6_5_5(
   4047                          i->ARM64in.Arith.isAdd ? X100 : X110,
   4048                          X01011000, rM, X000000, rN, rD
   4049                       );
   4050                break;
   4051             }
   4052             default:
   4053                goto bad;
   4054          }
   4055          goto done;
   4056       }
   4057       case ARM64in_Cmp: {
   4058          UInt      rD   = 31; /* XZR, we are going to dump the result */
   4059          UInt      rN   = iregNo(i->ARM64in.Cmp.argL);
   4060          ARM64RIA* argR = i->ARM64in.Cmp.argR;
   4061          Bool      is64 = i->ARM64in.Cmp.is64;
   4062          switch (argR->tag) {
   4063             case ARM64riA_I12:
   4064                /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
   4065                /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
   4066                *p++ = X_2_6_2_12_5_5(
   4067                          is64 ? X11 : X01, X110001,
   4068                          argR->ARM64riA.I12.shift == 12 ? X01 : X00,
   4069                          argR->ARM64riA.I12.imm12, rN, rD);
   4070                break;
   4071             case ARM64riA_R: {
   4072                /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
   4073                /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
   4074                UInt rM = iregNo(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
   4075                *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
   4076                                     X01011000, rM, X000000, rN, rD);
   4077                break;
   4078             }
   4079             default:
   4080                goto bad;
   4081          }
   4082          goto done;
   4083       }
   4084       case ARM64in_Logic: {
   4085          UInt      rD   = iregNo(i->ARM64in.Logic.dst);
   4086          UInt      rN   = iregNo(i->ARM64in.Logic.argL);
   4087          ARM64RIL* argR = i->ARM64in.Logic.argR;
   4088          UInt      opc  = 0; /* invalid */
   4089          vassert(rD < 31);
   4090          vassert(rN < 31);
   4091          switch (i->ARM64in.Logic.op) {
   4092             case ARM64lo_OR:  opc = X101; break;
   4093             case ARM64lo_AND: opc = X100; break;
   4094             case ARM64lo_XOR: opc = X110; break;
   4095             default: break;
   4096          }
   4097          vassert(opc != 0);
   4098          switch (argR->tag) {
   4099             case ARM64riL_I13: {
   4100                /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
   4101                /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
   4102                /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
   4103                *p++ = X_3_6_1_6_6_5_5(
   4104                          opc, X100100, argR->ARM64riL.I13.bitN,
   4105                          argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
   4106                          rN, rD
   4107                       );
   4108                break;
   4109             }
   4110             case ARM64riL_R: {
   4111                /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
   4112                /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
   4113                /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
   4114                UInt rM = iregNo(argR->ARM64riL.R.reg);
   4115                vassert(rM < 31);
   4116                *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
   4117                break;
   4118             }
   4119             default:
   4120                goto bad;
   4121          }
   4122          goto done;
   4123       }
   4124       case ARM64in_Test: {
   4125          UInt      rD   = 31; /* XZR, we are going to dump the result */
   4126          UInt      rN   = iregNo(i->ARM64in.Test.argL);
   4127          ARM64RIL* argR = i->ARM64in.Test.argR;
   4128          switch (argR->tag) {
   4129             case ARM64riL_I13: {
   4130                /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
   4131                *p++ = X_3_6_1_6_6_5_5(
   4132                          X111, X100100, argR->ARM64riL.I13.bitN,
   4133                          argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
   4134                          rN, rD
   4135                       );
   4136                break;
   4137             }
   4138             default:
   4139                goto bad;
   4140          }
   4141          goto done;
   4142       }
   4143       case ARM64in_Shift: {
   4144          UInt      rD   = iregNo(i->ARM64in.Shift.dst);
   4145          UInt      rN   = iregNo(i->ARM64in.Shift.argL);
   4146          ARM64RI6* argR = i->ARM64in.Shift.argR;
   4147          vassert(rD < 31);
   4148          vassert(rN < 31);
   4149          switch (argR->tag) {
   4150             case ARM64ri6_I6: {
   4151                /* 110 1001101 (63-sh) (64-sh) nn dd   LSL Xd, Xn, sh */
   4152                /* 110 1001101 sh      63      nn dd   LSR Xd, Xn, sh */
   4153                /* 100 1001101 sh      63      nn dd   ASR Xd, Xn, sh */
   4154                UInt sh = argR->ARM64ri6.I6.imm6;
   4155                vassert(sh > 0 && sh < 64);
   4156                switch (i->ARM64in.Shift.op) {
   4157                   case ARM64sh_SHL:
   4158                      *p++ = X_3_6_1_6_6_5_5(X110, X100110,
   4159                                             1, 64-sh, 63-sh, rN, rD);
   4160                      break;
   4161                   case ARM64sh_SHR:
   4162                      *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
   4163                      break;
   4164                   case ARM64sh_SAR:
   4165                      *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
   4166                      break;
   4167                   default:
   4168                      vassert(0);
   4169                }
   4170                break;
   4171             }
   4172             case ARM64ri6_R: {
   4173                /* 100 1101 0110 mm 001000 nn dd   LSL Xd, Xn, Xm */
   4174                /* 100 1101 0110 mm 001001 nn dd   LSR Xd, Xn, Xm */
   4175                /* 100 1101 0110 mm 001010 nn dd   ASR Xd, Xn, Xm */
   4176                UInt rM = iregNo(argR->ARM64ri6.R.reg);
   4177                vassert(rM < 31);
   4178                UInt subOpc = 0;
   4179                switch (i->ARM64in.Shift.op) {
   4180                   case ARM64sh_SHL: subOpc = X001000; break;
   4181                   case ARM64sh_SHR: subOpc = X001001; break;
   4182                   case ARM64sh_SAR: subOpc = X001010; break;
   4183                   default: vassert(0);
   4184                }
   4185                *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
   4186                break;
   4187             }
   4188             default:
   4189                vassert(0);
   4190          }
   4191          goto done;
   4192       }
   4193       case ARM64in_Unary: {
   4194          UInt rDst = iregNo(i->ARM64in.Unary.dst);
   4195          UInt rSrc = iregNo(i->ARM64in.Unary.src);
   4196          switch (i->ARM64in.Unary.op) {
   4197             case ARM64un_CLZ:
   4198                /* 1 10 1101 0110 00000 00010 0 nn dd   CLZ Xd, Xn */
   4199                /* 1 10 1101 0110 00000 00010 1 nn dd   CLS Xd, Xn (unimp) */
   4200                *p++ = X_3_8_5_6_5_5(X110,
   4201                                     X11010110, X00000, X000100, rSrc, rDst);
   4202                goto done;
   4203             case ARM64un_NEG:
   4204                /* 1 10 01011 000 m 000000 11111 d  NEG Xd,Xm */
   4205                /* 0 10 01011 000 m 000000 11111 d  NEG Wd,Wm (unimp) */
   4206                *p++ = X_3_8_5_6_5_5(X110,
   4207                                     X01011000, rSrc, X000000, X11111, rDst);
   4208                goto done;
   4209             case ARM64un_NOT: {
   4210                /* 1 01 01010 00 1 m 000000 11111 d   MVN Xd,Xm */
   4211                *p++ = X_3_8_5_6_5_5(X101,
   4212                                     X01010001, rSrc, X000000, X11111, rDst);
   4213                goto done;
   4214             }
   4215             default:
   4216                break;
   4217          }
   4218          goto bad;
   4219       }
   4220       case ARM64in_MovI: {
   4221          /* We generate the "preferred form", ORR Xd, XZR, Xm
   4222             101 01010 00 0 m 000000 11111 d
   4223          */
   4224          UInt instr = 0xAA0003E0;
   4225          UInt d     = iregNo(i->ARM64in.MovI.dst);
   4226          UInt m     = iregNo(i->ARM64in.MovI.src);
   4227          *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
   4228          goto done;
   4229       }
   4230       case ARM64in_Imm64: {
   4231          p = imm64_to_iregNo( p, iregNo(i->ARM64in.Imm64.dst),
   4232                               i->ARM64in.Imm64.imm64 );
   4233          goto done;
   4234       }
   4235       case ARM64in_LdSt64: {
   4236          p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
   4237                                  iregNo(i->ARM64in.LdSt64.rD),
   4238                                  i->ARM64in.LdSt64.amode );
   4239          goto done;
   4240       }
   4241       case ARM64in_LdSt32: {
   4242          p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
   4243                                  iregNo(i->ARM64in.LdSt32.rD),
   4244                                  i->ARM64in.LdSt32.amode );
   4245          goto done;
   4246       }
   4247       case ARM64in_LdSt16: {
   4248          p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
   4249                                  iregNo(i->ARM64in.LdSt16.rD),
   4250                                  i->ARM64in.LdSt16.amode );
   4251          goto done;
   4252       }
   4253       case ARM64in_LdSt8: {
   4254          p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
   4255                                 iregNo(i->ARM64in.LdSt8.rD),
   4256                                 i->ARM64in.LdSt8.amode );
   4257          goto done;
   4258       }
   4259 //ZZ       case ARMin_LdSt32:
   4260 //ZZ       case ARMin_LdSt8U: {
   4261 //ZZ          UInt        bL, bB;
   4262 //ZZ          HReg        rD;
   4263 //ZZ          ARMAMode1*  am;
   4264 //ZZ          ARMCondCode cc;
   4265 //ZZ          if (i->tag == ARMin_LdSt32) {
   4266 //ZZ             bB = 0;
   4267 //ZZ             bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
   4268 //ZZ             am = i->ARMin.LdSt32.amode;
   4269 //ZZ             rD = i->ARMin.LdSt32.rD;
   4270 //ZZ             cc = i->ARMin.LdSt32.cc;
   4271 //ZZ          } else {
   4272 //ZZ             bB = 1;
   4273 //ZZ             bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
   4274 //ZZ             am = i->ARMin.LdSt8U.amode;
   4275 //ZZ             rD = i->ARMin.LdSt8U.rD;
   4276 //ZZ             cc = i->ARMin.LdSt8U.cc;
   4277 //ZZ          }
   4278 //ZZ          vassert(cc != ARMcc_NV);
   4279 //ZZ          if (am->tag == ARMam1_RI) {
   4280 //ZZ             Int  simm12;
   4281 //ZZ             UInt instr, bP;
   4282 //ZZ             if (am->ARMam1.RI.simm13 < 0) {
   4283 //ZZ                bP = 0;
   4284 //ZZ                simm12 = -am->ARMam1.RI.simm13;
   4285 //ZZ             } else {
   4286 //ZZ                bP = 1;
   4287 //ZZ                simm12 = am->ARMam1.RI.simm13;
   4288 //ZZ             }
   4289 //ZZ             vassert(simm12 >= 0 && simm12 <= 4095);
   4290 //ZZ             instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
   4291 //ZZ                              iregNo(am->ARMam1.RI.reg),
   4292 //ZZ                              iregNo(rD));
   4293 //ZZ             instr |= simm12;
   4294 //ZZ             *p++ = instr;
   4295 //ZZ             goto done;
   4296 //ZZ          } else {
   4297 //ZZ             // RR case
   4298 //ZZ             goto bad;
   4299 //ZZ          }
   4300 //ZZ       }
   4301 //ZZ       case ARMin_LdSt16: {
   4302 //ZZ          HReg        rD = i->ARMin.LdSt16.rD;
   4303 //ZZ          UInt        bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
   4304 //ZZ          UInt        bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
   4305 //ZZ          ARMAMode2*  am = i->ARMin.LdSt16.amode;
   4306 //ZZ          ARMCondCode cc = i->ARMin.LdSt16.cc;
   4307 //ZZ          vassert(cc != ARMcc_NV);
   4308 //ZZ          if (am->tag == ARMam2_RI) {
   4309 //ZZ             HReg rN = am->ARMam2.RI.reg;
   4310 //ZZ             Int  simm8;
   4311 //ZZ             UInt bP, imm8hi, imm8lo, instr;
   4312 //ZZ             if (am->ARMam2.RI.simm9 < 0) {
   4313 //ZZ                bP = 0;
   4314 //ZZ                simm8 = -am->ARMam2.RI.simm9;
   4315 //ZZ             } else {
   4316 //ZZ                bP = 1;
   4317 //ZZ                simm8 = am->ARMam2.RI.simm9;
   4318 //ZZ             }
   4319 //ZZ             vassert(simm8 >= 0 && simm8 <= 255);
   4320 //ZZ             imm8hi = (simm8 >> 4) & 0xF;
   4321 //ZZ             imm8lo = simm8 & 0xF;
   4322 //ZZ             vassert(!(bL == 0 && bS == 1)); // "! signed store"
   4323 //ZZ             /**/ if (bL == 0 && bS == 0) {
   4324 //ZZ                // strh
   4325 //ZZ                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregNo(rN),
   4326 //ZZ                                 iregNo(rD), imm8hi, X1011, imm8lo);
   4327 //ZZ                *p++ = instr;
   4328 //ZZ                goto done;
   4329 //ZZ             }
   4330 //ZZ             else if (bL == 1 && bS == 0) {
   4331 //ZZ                // ldrh
   4332 //ZZ                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
   4333 //ZZ                                 iregNo(rD), imm8hi, X1011, imm8lo);
   4334 //ZZ                *p++ = instr;
   4335 //ZZ                goto done;
   4336 //ZZ             }
   4337 //ZZ             else if (bL == 1 && bS == 1) {
   4338 //ZZ                // ldrsh
   4339 //ZZ                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
   4340 //ZZ                                 iregNo(rD), imm8hi, X1111, imm8lo);
   4341 //ZZ                *p++ = instr;
   4342 //ZZ                goto done;
   4343 //ZZ             }
   4344 //ZZ             else vassert(0); // ill-constructed insn
   4345 //ZZ          } else {
   4346 //ZZ             // RR case
   4347 //ZZ             goto bad;
   4348 //ZZ          }
   4349 //ZZ       }
   4350 //ZZ       case ARMin_Ld8S: {
   4351 //ZZ          HReg        rD = i->ARMin.Ld8S.rD;
   4352 //ZZ          ARMAMode2*  am = i->ARMin.Ld8S.amode;
   4353 //ZZ          ARMCondCode cc = i->ARMin.Ld8S.cc;
   4354 //ZZ          vassert(cc != ARMcc_NV);
   4355 //ZZ          if (am->tag == ARMam2_RI) {
   4356 //ZZ             HReg rN = am->ARMam2.RI.reg;
   4357 //ZZ             Int  simm8;
   4358 //ZZ             UInt bP, imm8hi, imm8lo, instr;
   4359 //ZZ             if (am->ARMam2.RI.simm9 < 0) {
   4360 //ZZ                bP = 0;
   4361 //ZZ                simm8 = -am->ARMam2.RI.simm9;
   4362 //ZZ             } else {
   4363 //ZZ                bP = 1;
   4364 //ZZ                simm8 = am->ARMam2.RI.simm9;
   4365 //ZZ             }
   4366 //ZZ             vassert(simm8 >= 0 && simm8 <= 255);
   4367 //ZZ             imm8hi = (simm8 >> 4) & 0xF;
   4368 //ZZ             imm8lo = simm8 & 0xF;
   4369 //ZZ             // ldrsb
   4370 //ZZ             instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
   4371 //ZZ                              iregNo(rD), imm8hi, X1101, imm8lo);
   4372 //ZZ             *p++ = instr;
   4373 //ZZ             goto done;
   4374 //ZZ          } else {
   4375 //ZZ             // RR case
   4376 //ZZ             goto bad;
   4377 //ZZ          }
   4378 //ZZ       }
   4379 
   4380       case ARM64in_XDirect: {
   4381          /* NB: what goes on here has to be very closely coordinated
   4382             with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
   4383          /* We're generating chain-me requests here, so we need to be
   4384             sure this is actually allowed -- no-redir translations
   4385             can't use chain-me's.  Hence: */
   4386          vassert(disp_cp_chain_me_to_slowEP != NULL);
   4387          vassert(disp_cp_chain_me_to_fastEP != NULL);
   4388 
   4389          /* Use ptmp for backpatching conditional jumps. */
   4390          UInt* ptmp = NULL;
   4391 
   4392          /* First off, if this is conditional, create a conditional
   4393             jump over the rest of it.  Or at least, leave a space for
   4394             it that we will shortly fill in. */
   4395          if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
   4396             vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
   4397             ptmp = p;
   4398             *p++ = 0;
   4399          }
   4400 
   4401          /* Update the guest PC. */
   4402          /* imm64 x9, dstGA */
   4403          /* str   x9, amPC */
   4404          p = imm64_to_iregNo(p, /*x*/9, i->ARM64in.XDirect.dstGA);
   4405          p = do_load_or_store64(p, False/*!isLoad*/,
   4406                                 /*x*/9, i->ARM64in.XDirect.amPC);
   4407 
   4408          /* --- FIRST PATCHABLE BYTE follows --- */
   4409          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
   4410             calling to) backs up the return address, so as to find the
   4411             address of the first patchable byte.  So: don't change the
   4412             number of instructions (5) below. */
   4413          /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
   4414          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
   4415          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
   4416          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
   4417          /* blr  x9 */
   4418          void* disp_cp_chain_me
   4419                   = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
   4420                                                 : disp_cp_chain_me_to_slowEP;
   4421          p = imm64_to_iregNo_EXACTLY4(p, /*x*/9,
   4422                                       Ptr_to_ULong(disp_cp_chain_me));
   4423          *p++ = 0xD63F0120;
   4424          /* --- END of PATCHABLE BYTES --- */
   4425 
   4426          /* Fix up the conditional jump, if there was one. */
   4427          if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
   4428             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   4429             vassert(delta > 0 && delta < 40);
   4430             vassert((delta & 3) == 0);
   4431             UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
   4432             vassert(notCond <= 13); /* Neither AL nor NV */
   4433             vassert(ptmp != NULL);
   4434             delta = delta >> 2;
   4435             *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
   4436          }
   4437          goto done;
   4438       }
   4439 
   4440       case ARM64in_XIndir: {
   4441          // XIndir is more or less the same as XAssisted, except
   4442          // we don't have a trc value to hand back, so there's no
   4443          // write to r21
   4444          /* Use ptmp for backpatching conditional jumps. */
   4445          //UInt* ptmp = NULL;
   4446 
   4447          /* First off, if this is conditional, create a conditional
   4448             jump over the rest of it.  Or at least, leave a space for
   4449             it that we will shortly fill in. */
   4450          if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
   4451             vassert(0); //ATC
   4452 //ZZ             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
   4453 //ZZ             ptmp = p;
   4454 //ZZ             *p++ = 0;
   4455          }
   4456 
   4457          /* Update the guest PC. */
   4458          /* str r-dstGA, amPC */
   4459          p = do_load_or_store64(p, False/*!isLoad*/,
   4460                                 iregNo(i->ARM64in.XIndir.dstGA),
   4461                                 i->ARM64in.XIndir.amPC);
   4462 
   4463          /* imm64 x9, VG_(disp_cp_xindir) */
   4464          /* br    x9 */
   4465          p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xindir));
   4466          *p++ = 0xD61F0120; /* br x9 */
   4467 
   4468          /* Fix up the conditional jump, if there was one. */
   4469          if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
   4470             vassert(0); //ATC
   4471 //ZZ             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   4472 //ZZ             vassert(delta > 0 && delta < 40);
   4473 //ZZ             vassert((delta & 3) == 0);
   4474 //ZZ             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
   4475 //ZZ             vassert(notCond <= 13); /* Neither AL nor NV */
   4476 //ZZ             delta = (delta >> 2) - 2;
   4477 //ZZ             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   4478          }
   4479          goto done;
   4480       }
   4481 
   4482       case ARM64in_XAssisted: {
   4483          /* Use ptmp for backpatching conditional jumps. */
   4484          UInt* ptmp = NULL;
   4485 
   4486          /* First off, if this is conditional, create a conditional
   4487             jump over the rest of it.  Or at least, leave a space for
   4488             it that we will shortly fill in.  I think this can only
   4489             ever happen when VEX is driven by the switchbacker. */
   4490          if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
   4491             vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
   4492             ptmp = p;
   4493             *p++ = 0;
   4494          }
   4495 
   4496          /* Update the guest PC. */
   4497          /* str r-dstGA, amPC */
   4498          p = do_load_or_store64(p, False/*!isLoad*/,
   4499                                 iregNo(i->ARM64in.XAssisted.dstGA),
   4500                                 i->ARM64in.XAssisted.amPC);
   4501 
   4502          /* movw r21,  $magic_number */
   4503          UInt trcval = 0;
   4504          switch (i->ARM64in.XAssisted.jk) {
   4505             case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
   4506             case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
   4507             //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
   4508             //case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
   4509             //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
   4510             //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
   4511             case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
   4512             case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
   4513             case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
   4514             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
   4515             //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
   4516             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
   4517             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
   4518             /* We don't expect to see the following being assisted. */
   4519             //case Ijk_Ret:
   4520             //case Ijk_Call:
   4521             /* fallthrough */
   4522             default:
   4523                ppIRJumpKind(i->ARM64in.XAssisted.jk);
   4524                vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
   4525                       "unexpected jump kind");
   4526          }
   4527          vassert(trcval != 0);
   4528          p = imm64_to_iregNo(p, /*x*/21, (ULong)trcval);
   4529 
   4530          /* imm64 x9, VG_(disp_cp_xassisted) */
   4531          /* br    x9 */
   4532          p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xassisted));
   4533          *p++ = 0xD61F0120; /* br x9 */
   4534 
   4535          /* Fix up the conditional jump, if there was one. */
   4536          if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
   4537             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   4538             vassert(delta > 0 && delta < 40);
   4539             vassert((delta & 3) == 0);
   4540             UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
   4541             vassert(notCond <= 13); /* Neither AL nor NV */
   4542             vassert(ptmp != NULL);
   4543             delta = delta >> 2;
   4544             *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
   4545          }
   4546          goto done;
   4547       }
   4548 
   4549       case ARM64in_CSel: {
   4550          /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
   4551          UInt dd   = iregNo(i->ARM64in.CSel.dst);
   4552          UInt nn   = iregNo(i->ARM64in.CSel.argL);
   4553          UInt mm   = iregNo(i->ARM64in.CSel.argR);
   4554          UInt cond = (UInt)i->ARM64in.CSel.cond;
   4555          vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
   4556          *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
   4557          goto done;
   4558       }
   4559 
   4560       case ARM64in_Call: {
   4561          /* We'll use x9 as a scratch register to put the target
   4562             address in. */
   4563          if (i->ARM64in.Call.cond != ARM64cc_AL
   4564              && i->ARM64in.Call.rloc.pri != RLPri_None) {
   4565             /* The call might not happen (it isn't unconditional) and
   4566                it returns a result.  In this case we will need to
   4567                generate a control flow diamond to put 0x555..555 in
   4568                the return register(s) in the case where the call
   4569                doesn't happen.  If this ever becomes necessary, maybe
   4570                copy code from the 32-bit ARM equivalent.  Until that
   4571                day, just give up. */
   4572             goto bad;
   4573          }
   4574 
   4575          UInt* ptmp = NULL;
   4576          if (i->ARM64in.Call.cond != ARM64cc_AL) {
   4577             /* Create a hole to put a conditional branch in.  We'll
   4578                patch it once we know the branch length. */
   4579             ptmp = p;
   4580             *p++ = 0;
   4581          }
   4582 
   4583          // x9 = &target
   4584          p = imm64_to_iregNo( (UInt*)p,
   4585                               /*x*/9, (ULong)i->ARM64in.Call.target );
   4586          // blr x9
   4587          *p++ = 0xD63F0120;
   4588 
   4589          // Patch the hole if necessary
   4590          if (i->ARM64in.Call.cond != ARM64cc_AL) {
   4591             ULong dist = (ULong)(p - ptmp);
   4592             /* imm64_to_iregNo produces between 1 and 4 insns, and
   4593                then there's the BLR itself.  Hence: */
   4594             vassert(dist >= 2 && dist <= 5);
   4595             vassert(ptmp != NULL);
   4596             // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
   4597             *ptmp = X_8_19_1_4(X01010100, dist, 0,
   4598                                1 ^ (UInt)i->ARM64in.Call.cond);
   4599          } else {
   4600             vassert(ptmp == NULL);
   4601          }
   4602 
   4603          goto done;
   4604       }
   4605 
   4606       case ARM64in_AddToSP: {
   4607          /* 10,0 10001 00 imm12 11111 11111  ADD xsp, xsp, #imm12
   4608             11,0 10001 00 imm12 11111 11111  SUB xsp, xsp, #imm12
   4609          */
   4610          Int simm12 = i->ARM64in.AddToSP.simm;
   4611          vassert(-4096 < simm12 && simm12 < 4096);
   4612          vassert(0 == (simm12 & 0xF));
   4613          if (simm12 >= 0) {
   4614             *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
   4615          } else {
   4616             *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
   4617          }
   4618          goto done;
   4619       }
   4620 
   4621       case ARM64in_FromSP: {
   4622          /* 10,0 10001 00 0..(12)..0 11111 dd  MOV Xd, xsp */
   4623          UInt dd = iregNo(i->ARM64in.FromSP.dst);
   4624          vassert(dd < 31);
   4625          *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
   4626          goto done;
   4627       }
   4628 
   4629       case ARM64in_Mul: {
   4630          /* 100 11011 110 mm 011111 nn dd   UMULH Xd, Xn,Xm
   4631             100 11011 010 mm 011111 nn dd   SMULH Xd, Xn,Xm
   4632             100 11011 000 mm 011111 nn dd   MUL   Xd, Xn,Xm
   4633          */
   4634          UInt dd = iregNo(i->ARM64in.Mul.dst);
   4635          UInt nn = iregNo(i->ARM64in.Mul.argL);
   4636          UInt mm = iregNo(i->ARM64in.Mul.argR);
   4637          vassert(dd < 31 && nn < 31 && mm < 31);
   4638          switch (i->ARM64in.Mul.op) {
   4639             case ARM64mul_ZX:
   4640                *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
   4641                goto done;
   4642             case ARM64mul_SX:
   4643                *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
   4644                goto done;
   4645             case ARM64mul_PLAIN:
   4646                *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
   4647                goto done;
   4648             default:
   4649                vassert(0);
   4650          }
   4651          goto bad;
   4652       }
   4653       case ARM64in_LdrEX: {
   4654          /* 085F7C82   ldxrb w2, [x4]
   4655             485F7C82   ldxrh w2, [x4]
   4656             885F7C82   ldxr  w2, [x4]
   4657             C85F7C82   ldxr  x2, [x4]
   4658          */
   4659          switch (i->ARM64in.LdrEX.szB) {
   4660             case 1: *p++ = 0x085F7C82; goto done;
   4661             case 2: *p++ = 0x485F7C82; goto done;
   4662             case 4: *p++ = 0x885F7C82; goto done;
   4663             case 8: *p++ = 0xC85F7C82; goto done;
   4664             default: break;
   4665          }
   4666          goto bad;
   4667       }
   4668       case ARM64in_StrEX: {
   4669          /* 08007C82   stxrb w0, w2, [x4]
   4670             48007C82   stxrh w0, w2, [x4]
   4671             88007C82   stxr  w0, w2, [x4]
   4672             C8007C82   stxr  w0, x2, [x4]
   4673          */
   4674          switch (i->ARM64in.StrEX.szB) {
   4675             case 1: *p++ = 0x08007C82; goto done;
   4676             case 2: *p++ = 0x48007C82; goto done;
   4677             case 4: *p++ = 0x88007C82; goto done;
   4678             case 8: *p++ = 0xC8007C82; goto done;
   4679             default: break;
   4680          }
   4681          goto bad;
   4682       }
   4683       case ARM64in_MFence: {
   4684          *p++ = 0xD5033F9F; /* DSB sy */
   4685          *p++ = 0xD5033FBF; /* DMB sy */
   4686          *p++ = 0xD5033FDF; /* ISB */
   4687          goto done;
   4688       }
   4689       //case ARM64in_CLREX: {
   4690       //   //ATC, but believed to be correct
   4691       //   goto bad;
   4692       //   *p++ = 0xD5033F5F; /* clrex */
   4693       //   goto done;
   4694       //}
   4695       case ARM64in_VLdStS: {
   4696          /* 10 111101 01 imm12 n t   LDR St, [Xn|SP, #imm12 * 4]
   4697             10 111101 00 imm12 n t   STR St, [Xn|SP, #imm12 * 4]
   4698          */
   4699          UInt sD     = dregNo(i->ARM64in.VLdStS.sD);
   4700          UInt rN     = iregNo(i->ARM64in.VLdStS.rN);
   4701          UInt uimm12 = i->ARM64in.VLdStS.uimm12;
   4702          Bool isLD   = i->ARM64in.VLdStS.isLoad;
   4703          vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
   4704          uimm12 >>= 2;
   4705          vassert(uimm12 < (1<<12));
   4706          vassert(sD < 32);
   4707          vassert(rN < 31);
   4708          *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
   4709                                uimm12, rN, sD);
   4710          goto done;
   4711       }
   4712       case ARM64in_VLdStD: {
   4713          /* 11 111101 01 imm12 n t   LDR Dt, [Xn|SP, #imm12 * 8]
   4714             11 111101 00 imm12 n t   STR Dt, [Xn|SP, #imm12 * 8]
   4715          */
   4716          UInt dD     = dregNo(i->ARM64in.VLdStD.dD);
   4717          UInt rN     = iregNo(i->ARM64in.VLdStD.rN);
   4718          UInt uimm12 = i->ARM64in.VLdStD.uimm12;
   4719          Bool isLD   = i->ARM64in.VLdStD.isLoad;
   4720          vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
   4721          uimm12 >>= 3;
   4722          vassert(uimm12 < (1<<12));
   4723          vassert(dD < 32);
   4724          vassert(rN < 31);
   4725          *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
   4726                                uimm12, rN, dD);
   4727          goto done;
   4728       }
   4729       case ARM64in_VLdStQ: {
   4730          /* 0100 1100 0000 0000 0111 11 rN rQ   st1 {vQ.2d}, [<rN|SP>]
   4731             0100 1100 0100 0000 0111 11 rN rQ   ld1 {vQ.2d}, [<rN|SP>]
   4732          */
   4733          UInt rQ = qregNo(i->ARM64in.VLdStQ.rQ);
   4734          UInt rN = iregNo(i->ARM64in.VLdStQ.rN);
   4735          vassert(rQ < 32);
   4736          vassert(rN < 31);
   4737          if (i->ARM64in.VLdStQ.isLoad) {
   4738             *p++ = 0x4C407C00 | (rN << 5) | rQ;
   4739          } else {
   4740             *p++ = 0x4C007C00 | (rN << 5) | rQ;
   4741          }
   4742          goto done;
   4743       }
   4744       case ARM64in_VCvtI2F: {
   4745          /* 31  28    23 21 20 18  15     9 4
   4746             000 11110 00 1  00 010 000000 n d  SCVTF Sd, Wn
   4747             000 11110 01 1  00 010 000000 n d  SCVTF Dd, Wn
   4748             100 11110 00 1  00 010 000000 n d  SCVTF Sd, Xn
   4749             100 11110 01 1  00 010 000000 n d  SCVTF Dd, Xn
   4750             000 11110 00 1  00 011 000000 n d  UCVTF Sd, Wn
   4751             000 11110 01 1  00 011 000000 n d  UCVTF Dd, Wn
   4752             100 11110 00 1  00 011 000000 n d  UCVTF Sd, Xn
   4753             100 11110 01 1  00 011 000000 n d  UCVTF Dd, Xn
   4754          */
   4755          UInt       rN = iregNo(i->ARM64in.VCvtI2F.rS);
   4756          UInt       rD = dregNo(i->ARM64in.VCvtI2F.rD);
   4757          ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
   4758          /* Just handle cases as they show up. */
   4759          switch (how) {
   4760             case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
   4761                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
   4762                break;
   4763             case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
   4764                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
   4765                break;
   4766             case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
   4767                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
   4768                break;
   4769             case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
   4770                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
   4771                break;
   4772             case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
   4773                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
   4774                break;
   4775             case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
   4776                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
   4777                break;
   4778             case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
   4779                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
   4780                break;
   4781             case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn  */
   4782                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
   4783                break;
   4784             default:
   4785                goto bad; //ATC
   4786          }
   4787          goto done;
   4788       }
   4789       case ARM64in_VCvtF2I: {
   4790          /*    30       23   20 18  15     9 4
   4791             sf 00,11110,0x 1 00 000,000000 n d  FCVTNS Rd, Fn (round to
   4792             sf 00,11110,0x 1 00 001,000000 n d  FCVTNU Rd, Fn  nearest)
   4793             ---------------- 01 --------------  FCVTP-------- (round to +inf)
   4794             ---------------- 10 --------------  FCVTM-------- (round to -inf)
   4795             ---------------- 11 --------------  FCVTZ-------- (round to zero)
   4796 
   4797             Rd is Xd when sf==1, Wd when sf==0
   4798             Fn is Dn when x==1, Sn when x==0
   4799             20:19 carry the rounding mode, using the same encoding as FPCR
   4800          */
   4801          UInt       rD    = iregNo(i->ARM64in.VCvtF2I.rD);
   4802          UInt       rN    = dregNo(i->ARM64in.VCvtF2I.rS);
   4803          ARM64CvtOp how   = i->ARM64in.VCvtF2I.how;
   4804          UChar      armRM = i->ARM64in.VCvtF2I.armRM;
   4805          /* Just handle cases as they show up. */
   4806          switch (how) {
   4807             case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
   4808                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
   4809                                     X000000, rN, rD);
   4810                break;
   4811             case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
   4812                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
   4813                                     X000000, rN, rD);
   4814                break;
   4815             case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
   4816                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
   4817                                     X000000, rN, rD);
   4818                break;
   4819             case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
   4820                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
   4821                                     X000000, rN, rD);
   4822                break;
   4823             case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
   4824                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
   4825                                     X000000, rN, rD);
   4826                break;
   4827             case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
   4828                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
   4829                                     X000000, rN, rD);
   4830                break;
   4831             case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
   4832                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
   4833                                     X000000, rN, rD);
   4834                break;
   4835             case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
   4836                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
   4837                                     X000000, rN, rD);
   4838                break;
   4839             default:
   4840                goto bad; //ATC
   4841          }
   4842          goto done;
   4843       }
   4844       case ARM64in_VCvtSD: {
   4845          /* 31        23 21     16  14    9 4
   4846             000,11110, 00 10001 0,1 10000 n d   FCVT Dd, Sn (S->D)
   4847             ---------- 01 ----- 0,0 ---------   FCVT Sd, Dn (D->S)
   4848             Rounding, when dst is smaller than src, is per the FPCR.
   4849          */
   4850          UInt dd = dregNo(i->ARM64in.VCvtSD.dst);
   4851          UInt nn = dregNo(i->ARM64in.VCvtSD.src);
   4852          if (i->ARM64in.VCvtSD.sToD) {
   4853             *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
   4854          } else {
   4855             *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
   4856          }
   4857          goto done;
   4858       }
   4859       case ARM64in_VUnaryD: {
   4860          /* 31        23 21     16 14    9 4
   4861             000,11110 01 1,0000 0,0 10000 n d  FMOV Dd, Dn (not handled)
   4862             ------------------- 0,1 ---------  FABS ------
   4863             ------------------- 1,0 ---------  FNEG ------
   4864             ------------------- 1,1 ---------  FQSRT -----
   4865          */
   4866          UInt dD  = dregNo(i->ARM64in.VUnaryD.dst);
   4867          UInt dN  = dregNo(i->ARM64in.VUnaryD.src);
   4868          UInt b16 = 2; /* impossible */
   4869          UInt b15 = 2; /* impossible */
   4870          switch (i->ARM64in.VUnaryD.op) {
   4871             case ARM64fpu_NEG:  b16 = 1; b15 = 0; break;
   4872             case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
   4873             case ARM64fpu_ABS:  b16 = 0; b15 = 1; break;
   4874             default: break;
   4875          }
   4876          if (b16 < 2 && b15 < 2) {
   4877             *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
   4878                                  (b15 << 5) | X10000, dN, dD);
   4879             goto done;
   4880          }
   4881          /*
   4882             000, 11110 01 1,001 11,1 10000 n d  FRINTI Dd, Dm (round per FPCR)
   4883          */
   4884          if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
   4885            *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
   4886            goto done;
   4887          }
   4888          goto bad;
   4889       }
   4890       case ARM64in_VUnaryS: {
   4891          /* 31        23 21     16 14    9 4
   4892             000,11110 00 1,0000 0,0 10000 n d  FMOV Sd, Sn (not handled)
   4893             ------------------- 0,1 ---------  FABS ------
   4894             ------------------- 1,0 ---------  FNEG ------
   4895             ------------------- 1,1 ---------  FQSRT -----
   4896          */
   4897          UInt sD  = dregNo(i->ARM64in.VUnaryS.dst);
   4898          UInt sN  = dregNo(i->ARM64in.VUnaryS.src);
   4899          UInt b16 = 2; /* impossible */
   4900          UInt b15 = 2; /* impossible */
   4901          switch (i->ARM64in.VUnaryS.op) {
   4902             case ARM64fpu_NEG:  b16 = 1; b15 = 0; break;
   4903             case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
   4904             case ARM64fpu_ABS:  b16 = 0; b15 = 1; break;
   4905             default: break;
   4906          }
   4907          if (b16 < 2 && b15 < 2) {
   4908             *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
   4909                                  (b15 << 5) | X10000, sN, sD);
   4910             goto done;
   4911          }
   4912          /*
   4913             000, 11110 00 1,001 11,1 10000 n d  FRINTI Sd, Sm (round per FPCR)
   4914          */
   4915          if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
   4916            *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
   4917            goto done;
   4918          }
   4919          goto bad;
   4920       }
   4921       case ARM64in_VBinD: {
   4922          /* 31        23  20 15   11 9 4
   4923             ---------------- 0000 ------   FMUL  --------
   4924             000 11110 011 m  0001 10 n d   FDIV  Dd,Dn,Dm
   4925             ---------------- 0010 ------   FADD  --------
   4926             ---------------- 0011 ------   FSUB  --------
   4927          */
   4928          UInt dD = dregNo(i->ARM64in.VBinD.dst);
   4929          UInt dN = dregNo(i->ARM64in.VBinD.argL);
   4930          UInt dM = dregNo(i->ARM64in.VBinD.argR);
   4931          UInt b1512 = 16; /* impossible */
   4932          switch (i->ARM64in.VBinD.op) {
   4933             case ARM64fpb_DIV: b1512 = X0001; break;
   4934             case ARM64fpb_MUL: b1512 = X0000; break;
   4935             case ARM64fpb_SUB: b1512 = X0011; break;
   4936             case ARM64fpb_ADD: b1512 = X0010; break;
   4937             default: goto bad;
   4938          }
   4939          vassert(b1512 < 16);
   4940          *p++
   4941             = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
   4942          goto done;
   4943       }
   4944       case ARM64in_VBinS: {
   4945          /* 31        23  20 15   11 9 4
   4946             ---------------- 0000 ------   FMUL  --------
   4947             000 11110 001 m  0001 10 n d   FDIV  Dd,Dn,Dm
   4948             ---------------- 0010 ------   FADD  --------
   4949             ---------------- 0011 ------   FSUB  --------
   4950          */
   4951          UInt sD = dregNo(i->ARM64in.VBinS.dst);
   4952          UInt sN = dregNo(i->ARM64in.VBinS.argL);
   4953          UInt sM = dregNo(i->ARM64in.VBinS.argR);
   4954          UInt b1512 = 16; /* impossible */
   4955          switch (i->ARM64in.VBinS.op) {
   4956             case ARM64fpb_DIV: b1512 = X0001; break;
   4957             case ARM64fpb_MUL: b1512 = X0000; break;
   4958             case ARM64fpb_SUB: b1512 = X0011; break;
   4959             case ARM64fpb_ADD: b1512 = X0010; break;
   4960             default: goto bad;
   4961          }
   4962          vassert(b1512 < 16);
   4963          *p++
   4964             = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
   4965          goto done;
   4966       }
   4967       case ARM64in_VCmpD: {
   4968          /* 000 11110 01 1 m 00 1000 n 00 000  FCMP Dn, Dm */
   4969          UInt dN = dregNo(i->ARM64in.VCmpD.argL);
   4970          UInt dM = dregNo(i->ARM64in.VCmpD.argR);
   4971          *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
   4972          goto done;
   4973       }
   4974       case ARM64in_VCmpS: {
   4975          /* 000 11110 00 1 m 00 1000 n 00 000  FCMP Sn, Sm */
   4976          UInt sN = dregNo(i->ARM64in.VCmpS.argL);
   4977          UInt sM = dregNo(i->ARM64in.VCmpS.argR);
   4978          *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
   4979          goto done;
   4980       }
   4981       case ARM64in_FPCR: {
   4982          Bool toFPCR = i->ARM64in.FPCR.toFPCR;
   4983          UInt iReg   = iregNo(i->ARM64in.FPCR.iReg);
   4984          if (toFPCR) {
   4985             /* 0xD51B44 000 Rt  MSR fpcr, rT */
   4986             *p++ = 0xD51B4400 | (iReg & 0x1F);
   4987             goto done;
   4988          }
   4989          goto bad; // FPCR -> iReg case currently ATC
   4990       }
   4991       case ARM64in_VBinV: {
   4992          /* 31        23   20 15     9 4
   4993             010 01110 11 1 m  100001 n d   ADD Vd.2d,  Vn.2d,  Vm.2d
   4994             010 01110 10 1 m  100001 n d   ADD Vd.4s,  Vn.4s,  Vm.4s
   4995             010 01110 01 1 m  100001 n d   ADD Vd.8h,  Vn.8h,  Vm.8h
   4996             010 01110 00 1 m  100001 n d   ADD Vd.16b, Vn.16b, Vm.16b
   4997 
   4998             011 01110 11 1 m  100001 n d   SUB Vd.2d,  Vn.2d,  Vm.2d
   4999             011 01110 10 1 m  100001 n d   SUB Vd.4s,  Vn.4s,  Vm.4s
   5000             011 01110 01 1 m  100001 n d   SUB Vd.8h,  Vn.8h,  Vm.8h
   5001             011 01110 00 1 m  100001 n d   SUB Vd.16b, Vn.16b, Vm.16b
   5002 
   5003             010 01110 10 1 m  100111 n d   MUL Vd.4s,  Vn.4s,  Vm.4s
   5004             010 01110 01 1 m  100111 n d   MUL Vd.8h,  Vn.8h,  Vm.8h
   5005             010 01110 00 1 m  100111 n d   MUL Vd.16b, Vn.16b, Vm.16b
   5006 
   5007             010 01110 01 1 m  110101 n d   FADD Vd.2d, Vn.2d, Vm.2d
   5008             010 01110 00 1 m  110101 n d   FADD Vd.4s, Vn.4s, Vm.4s
   5009             010 01110 11 1 m  110101 n d   FSUB Vd.2d, Vn.2d, Vm.2d
   5010             010 01110 10 1 m  110101 n d   FSUB Vd.4s, Vn.4s, Vm.4s
   5011 
   5012             011 01110 01 1 m  110111 n d   FMUL Vd.2d, Vn.2d, Vm.2d
   5013             011 01110 00 1 m  110111 n d   FMUL Vd.4s, Vn.4s, Vm.4s
   5014             011 01110 01 1 m  111111 n d   FDIV Vd.2d, Vn.2d, Vm.2d
   5015             011 01110 00 1 m  111111 n d   FDIV Vd.4s, Vn.4s, Vm.4s
   5016 
   5017             011 01110 10 1 m  011001 n d   UMAX Vd.4s,  Vn.4s,  Vm.4s
   5018             011 01110 01 1 m  011001 n d   UMAX Vd.8h,  Vn.8h,  Vm.8h
   5019             011 01110 00 1 m  011001 n d   UMAX Vd.16b, Vn.16b, Vm.16b
   5020 
   5021             011 01110 10 1 m  011011 n d   UMIN Vd.4s,  Vn.4s,  Vm.4s
   5022             011 01110 01 1 m  011011 n d   UMIN Vd.8h,  Vn.8h,  Vm.8h
   5023             011 01110 00 1 m  011011 n d   UMIN Vd.16b, Vn.16b, Vm.16b
   5024 
   5025             010 01110 10 1 m  011001 n d   SMAX Vd.4s,  Vn.4s,  Vm.4s
   5026             010 01110 01 1 m  011001 n d   SMAX Vd.8h,  Vn.8h,  Vm.8h
   5027             010 01110 00 1 m  011001 n d   SMAX Vd.16b, Vn.16b, Vm.16b
   5028 
   5029             010 01110 10 1 m  011011 n d   SMIN Vd.4s,  Vn.4s,  Vm.4s
   5030             010 01110 01 1 m  011011 n d   SMIN Vd.8h,  Vn.8h,  Vm.8h
   5031             010 01110 00 1 m  011011 n d   SMIN Vd.16b, Vn.16b, Vm.16b
   5032 
   5033             010 01110 00 1 m  000111 n d   AND Vd, Vn, Vm
   5034             010 01110 10 1 m  000111 n d   ORR Vd, Vn, Vm
   5035             011 01110 00 1 m  000111 n d   EOR Vd, Vn, Vm
   5036 
   5037             011 01110 11 1 m  100011 n d   CMEQ Vd.2d,  Vn.2d,  Vm.2d
   5038             011 01110 10 1 m  100011 n d   CMEQ Vd.4s,  Vn.4s,  Vm.4s
   5039             011 01110 01 1 m  100011 n d   CMEQ Vd.8h,  Vn.8h,  Vm.8h
   5040             011 01110 00 1 m  100011 n d   CMEQ Vd.16b, Vn.16b, Vm.16b
   5041 
   5042             011 01110 11 1 m  001101 n d   CMHI Vd.2d,  Vn.2d,  Vm.2d
   5043             011 01110 10 1 m  001101 n d   CMHI Vd.4s,  Vn.4s,  Vm.4s
   5044             011 01110 01 1 m  001101 n d   CMHI Vd.8h,  Vn.8h,  Vm.8h
   5045             011 01110 00 1 m  001101 n d   CMHI Vd.16b, Vn.16b, Vm.16b
   5046 
   5047             010 01110 11 1 m  001101 n d   CMGT Vd.2d,  Vn.2d,  Vm.2d
   5048             010 01110 10 1 m  001101 n d   CMGT Vd.4s,  Vn.4s,  Vm.4s
   5049             010 01110 01 1 m  001101 n d   CMGT Vd.8h,  Vn.8h,  Vm.8h
   5050             010 01110 00 1 m  001101 n d   CMGT Vd.16b, Vn.16b, Vm.16b
   5051 
   5052             010 01110 01 1 m  111001 n d   FCMEQ Vd.2d, Vn.2d, Vm.2d
   5053             010 01110 00 1 m  111001 n d   FCMEQ Vd.4s, Vn.4s, Vm.4s
   5054 
   5055             011 01110 01 1 m  111001 n d   FCMGE Vd.2d, Vn.2d, Vm.2d
   5056             011 01110 00 1 m  111001 n d   FCMGE Vd.4s, Vn.4s, Vm.4s
   5057 
   5058             011 01110 11 1 m  111001 n d   FCMGT Vd.2d, Vn.2d, Vm.2d
   5059             011 01110 10 1 m  111001 n d   FCMGT Vd.4s, Vn.4s, Vm.4s
   5060 
   5061             010 01110 00 0 m  000000 n d   TBL Vd.16b, {Vn.16b}, Vm.16b
   5062 
   5063          */
   5064          UInt vD = qregNo(i->ARM64in.VBinV.dst);
   5065          ARM64VecBinOp op = i->ARM64in.VBinV.op;
   5066          Bool isV128 = (op != ARM64vecb_UMULL8x8
   5067                         && op != ARM64vecb_UMULL16x4
   5068                         && op != ARM64vecb_UMULL32x2);
   5069          UInt vN = isV128 ? qregNo(i->ARM64in.VBinV.argL)
   5070                           : dregNo(i->ARM64in.VBinV.argL);
   5071          UInt vM = isV128 ? qregNo(i->ARM64in.VBinV.argR)
   5072                           : dregNo(i->ARM64in.VBinV.argR);
   5073          switch (i->ARM64in.VBinV.op) {
   5074             case ARM64vecb_ADD64x2:
   5075                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
   5076                break;
   5077             case ARM64vecb_ADD32x4:
   5078                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
   5079                break;
   5080             case ARM64vecb_ADD16x8:
   5081                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
   5082                break;
   5083             case ARM64vecb_ADD8x16:
   5084                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
   5085                break;
   5086             case ARM64vecb_SUB64x2:
   5087                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
   5088                break;
   5089             case ARM64vecb_SUB32x4:
   5090                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
   5091                break;
   5092             case ARM64vecb_SUB16x8:
   5093                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
   5094                break;
   5095             case ARM64vecb_SUB8x16:
   5096                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
   5097                break;
   5098             case ARM64vecb_MUL32x4:
   5099                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
   5100                break;
   5101             case ARM64vecb_MUL16x8:
   5102                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
   5103                break;
   5104             case ARM64vecb_MUL8x16:
   5105                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
   5106                break;
   5107             case ARM64vecb_FADD64x2:
   5108                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
   5109                break;
   5110             case ARM64vecb_FADD32x4:
   5111                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
   5112                break;
   5113             case ARM64vecb_FSUB64x2:
   5114                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
   5115                break;
   5116             case ARM64vecb_FSUB32x4:
   5117                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
   5118                break;
   5119             case ARM64vecb_FMUL64x2:
   5120                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
   5121                break;
   5122             case ARM64vecb_FMUL32x4:
   5123                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
   5124                break;
   5125             case ARM64vecb_FDIV64x2:
   5126                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
   5127                break;
   5128             case ARM64vecb_FDIV32x4:
   5129                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
   5130                break;
   5131 
   5132             case ARM64vecb_UMAX32x4:
   5133                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
   5134                break;
   5135             case ARM64vecb_UMAX16x8:
   5136                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
   5137                break;
   5138             case ARM64vecb_UMAX8x16:
   5139                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
   5140                break;
   5141 
   5142             case ARM64vecb_UMIN32x4:
   5143                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
   5144                break;
   5145             case ARM64vecb_UMIN16x8:
   5146                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
   5147                break;
   5148             case ARM64vecb_UMIN8x16:
   5149                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
   5150                break;
   5151 
   5152             case ARM64vecb_UMULL32x2:
   5153                *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
   5154                break;
   5155             case ARM64vecb_UMULL16x4:
   5156                *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
   5157                break;
   5158             case ARM64vecb_UMULL8x8:
   5159                *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
   5160                break;
   5161 
   5162             case ARM64vecb_SMAX32x4:
   5163                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
   5164                break;
   5165             case ARM64vecb_SMAX16x8:
   5166                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
   5167                break;
   5168             case ARM64vecb_SMAX8x16:
   5169                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
   5170                break;
   5171 
   5172             case ARM64vecb_SMIN32x4:
   5173                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
   5174                break;
   5175             case ARM64vecb_SMIN16x8:
   5176                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
   5177                break;
   5178             case ARM64vecb_SMIN8x16:
   5179                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
   5180                break;
   5181 
   5182             case ARM64vecb_AND:
   5183                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
   5184                break;
   5185             case ARM64vecb_ORR:
   5186                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
   5187                break;
   5188             case ARM64vecb_XOR:
   5189                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
   5190                break;
   5191 
   5192             case ARM64vecb_CMEQ64x2:
   5193                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
   5194                break;
   5195             case ARM64vecb_CMEQ32x4:
   5196                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
   5197                break;
   5198             case ARM64vecb_CMEQ16x8:
   5199                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
   5200                break;
   5201             case ARM64vecb_CMEQ8x16:
   5202                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
   5203                break;
   5204 
   5205             case ARM64vecb_CMHI64x2:
   5206                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM,  X001101, vN, vD);
   5207                break;
   5208             case ARM64vecb_CMHI32x4:
   5209                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM,  X001101, vN, vD);
   5210                break;
   5211             case ARM64vecb_CMHI16x8:
   5212                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM,  X001101, vN, vD);
   5213                break;
   5214             case ARM64vecb_CMHI8x16:
   5215                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM,  X001101, vN, vD);
   5216                break;
   5217 
   5218             case ARM64vecb_CMGT64x2:
   5219                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM,  X001101, vN, vD);
   5220                break;
   5221             case ARM64vecb_CMGT32x4:
   5222                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM,  X001101, vN, vD);
   5223                break;
   5224             case ARM64vecb_CMGT16x8:
   5225                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM,  X001101, vN, vD);
   5226                break;
   5227             case ARM64vecb_CMGT8x16:
   5228                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM,  X001101, vN, vD);
   5229                break;
   5230 
   5231             case ARM64vecb_FCMEQ64x2:
   5232                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
   5233                break;
   5234             case ARM64vecb_FCMEQ32x4:
   5235                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
   5236                break;
   5237 
   5238             case ARM64vecb_FCMGE64x2:
   5239                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
   5240                break;
   5241             case ARM64vecb_FCMGE32x4:
   5242                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
   5243                break;
   5244 
   5245             case ARM64vecb_FCMGT64x2:
   5246                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
   5247                break;
   5248             case ARM64vecb_FCMGT32x4:
   5249                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
   5250                break;
   5251 
   5252             case ARM64vecb_TBL1:
   5253                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM,  X000000, vN, vD);
   5254                break;
   5255 
   5256             default:
   5257                goto bad;
   5258          }
   5259          goto done;
   5260       }
   5261       case ARM64in_VUnaryV: {
   5262          /* 31        23   20    15     9 4
   5263             010 01110 11 1 00000 111110 n d  FABS Vd.2d,  Vn.2d
   5264             010 01110 10 1 00000 111110 n d  FABS Vd.4s,  Vn.4s
   5265             011 01110 11 1 00000 111110 n d  FNEG Vd.2d,  Vn.2d
   5266             011 01110 10 1 00000 111110 n d  FNEG Vd.4s,  Vn.4s
   5267             010 01110 00 1 00000 010110 n d  CNT  Vd.16b, Vn.16b
   5268             011 01110 00 1 00000 010110 n d  NOT  Vd.16b, Vn.16b
   5269 
   5270             011 01110 00 1 10000 001110 n d  UADDLV  Hd, Vn.16b
   5271             011 01110 01 1 10000 001110 n d  UADDLV  Sd, Vn.8h
   5272             011 01110 10 1 10000 001110 n d  UADDLV  Dd, Vn.4s
   5273 
   5274             010 01110 00 1 10000 001110 n d  SADDLV  Hd, Vn.16b
   5275             010 01110 01 1 10000 001110 n d  SADDLV  Sd, Vn.8h
   5276             010 01110 10 1 10000 001110 n d  SADDLV  Dd, Vn.4s
   5277          */
   5278          ARM64VecUnaryOp op = i->ARM64in.VUnaryV.op;
   5279          UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
   5280          Bool isV128 = !(op >= ARM64vecu_VMOVL8U && op <= ARM64vecu_VMOVL32S);
   5281          UInt vN = isV128 ? qregNo(i->ARM64in.VUnaryV.arg)
   5282                           : dregNo(i->ARM64in.VUnaryV.arg);
   5283          switch (i->ARM64in.VUnaryV.op) {
   5284             case ARM64vecu_FABS64x2:
   5285                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
   5286                break;
   5287             case ARM64vecu_FABS32x4:
   5288                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
   5289                break;
   5290             case ARM64vecu_FNEG64x2:
   5291                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
   5292                break;
   5293             case ARM64vecu_FNEG32x4:
   5294                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
   5295                break;
   5296             case ARM64vecu_VMOVL8U:
   5297                *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X001000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
   5298                break;
   5299             case ARM64vecu_VMOVL16U:
   5300                *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X010000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
   5301                break;
   5302             case ARM64vecu_VMOVL32U:
   5303                *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X100000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
   5304                break;
   5305             case ARM64vecu_VMOVL8S:
   5306                *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X001000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
   5307                break;
   5308             case ARM64vecu_VMOVL16S:
   5309                *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X010000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
   5310                break;
   5311             case ARM64vecu_VMOVL32S:
   5312                *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X100000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
   5313                break;
   5314             case ARM64vecu_NOT:
   5315                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
   5316                break;
   5317             case ARM64vecu_CNT:
   5318                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
   5319                break;
   5320             case ARM64vecu_UADDLV8x16:
   5321                *p++ = X_3_8_5_6_5_5(X011, X01110001, X10000, X001110, vN, vD);
   5322                break;
   5323             case ARM64vecu_UADDLV16x8:
   5324                *p++ = X_3_8_5_6_5_5(X011, X01110011, X10000, X001110, vN, vD);
   5325                break;
   5326             case ARM64vecu_UADDLV32x4:
   5327                *p++ = X_3_8_5_6_5_5(X011, X01110101, X10000, X001110, vN, vD);
   5328                break;
   5329             case ARM64vecu_SADDLV8x16:
   5330                *p++ = X_3_8_5_6_5_5(X010, X01110001, X10000, X001110, vN, vD);
   5331                break;
   5332             case ARM64vecu_SADDLV16x8:
   5333                *p++ = X_3_8_5_6_5_5(X010, X01110011, X10000, X001110, vN, vD);
   5334                break;
   5335             case ARM64vecu_SADDLV32x4:
   5336                *p++ = X_3_8_5_6_5_5(X010, X01110101, X10000, X001110, vN, vD);
   5337                break;
   5338             default:
   5339                goto bad;
   5340          }
   5341          goto done;
   5342       }
   5343       case ARM64in_VNarrowV: {
   5344          /* 31        23 21      15     9 4
   5345             000 01110 00 1,00001 001010 n d  XTN Vd.8b, Vn.8h
   5346             000 01110 01 1,00001 001010 n d  XTN Vd.4h, Vn.4s
   5347             000 01110 10 1,00001 001010 n d  XTN Vd.2s, Vn.2d
   5348          */
   5349          UInt vD = qregNo(i->ARM64in.VNarrowV.dst);
   5350          UInt vN = qregNo(i->ARM64in.VNarrowV.src);
   5351          UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
   5352          vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
   5353          *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
   5354                               X00001, X001010, vN, vD);
   5355          goto done;
   5356       }
   5357       case ARM64in_VShiftImmV: {
   5358          /*
   5359             0q1 011110 immh immb 000001 n d  USHR Vd.T, Vn.T, #sh
   5360             0q0 011110 immh immb 000001 n d  SSHR Vd.T, Vn.T, #sh
   5361             where immh:immb
   5362                = case T of
   5363                     2d  | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx
   5364                     4s  | sh in 1..31 -> let  xxxxx = 32-sh in 01xx:xxx
   5365                     8h  | sh in 1..15 -> let   xxxx = 16-sh in 001x:xxx
   5366                     16b | sh in 1..7  -> let    xxx =  8-sh in 0001:xxx
   5367 
   5368             0q0 011110 immh immb 010101 n d  SHL Vd.T, Vn.T, #sh
   5369             where immh:immb
   5370                = case T of
   5371                     2d  | sh in 1..63 -> let xxxxxx = sh in 1xxx:xxx
   5372                     4s  | sh in 1..31 -> let  xxxxx = sh in 01xx:xxx
   5373                     8h  | sh in 1..15 -> let   xxxx = sh in 001x:xxx
   5374                     16b | sh in 1..7  -> let    xxx = sh in 0001:xxx
   5375          */
   5376          UInt vD = qregNo(i->ARM64in.VShiftImmV.dst);
   5377          UInt vN = qregNo(i->ARM64in.VShiftImmV.src);
   5378          UInt sh = i->ARM64in.VShiftImmV.amt;
   5379          ARM64VecShiftOp op = i->ARM64in.VShiftImmV.op;
   5380          Bool syned = False;
   5381          switch (op) {
   5382             /* 64x2 cases */
   5383             case ARM64vecsh_SSHR64x2: syned = True;
   5384             case ARM64vecsh_USHR64x2: /* fallthrough */
   5385                if (sh >= 1 && sh <= 63) {
   5386                   UInt xxxxxx = 64-sh;
   5387                   *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
   5388                                        X1000000 | xxxxxx, X000001, vN, vD);
   5389                   goto done;
   5390                }
   5391                break;
   5392             case ARM64vecsh_SHL64x2:
   5393                if (sh >= 1 && sh <= 63) {
   5394                   UInt xxxxxx = sh;
   5395                   *p++ = X_3_6_7_6_5_5(X010, X011110,
   5396                                        X1000000 | xxxxxx, X010101, vN, vD);
   5397                   goto done;
   5398                }
   5399                break;
   5400             /* 32x4 cases */
   5401             case ARM64vecsh_SSHR32x4: syned = True;
   5402             case ARM64vecsh_USHR32x4: /* fallthrough */
   5403                if (sh >= 1 && sh <= 31) {
   5404                   UInt xxxxx = 32-sh;
   5405                   *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
   5406                                        X0100000 | xxxxx, X000001, vN, vD);
   5407                   goto done;
   5408                }
   5409                break;
   5410             case ARM64vecsh_SHL32x4:
   5411                if (sh >= 1 && sh <= 31) {
   5412                   UInt xxxxx = sh;
   5413                   *p++ = X_3_6_7_6_5_5(X010, X011110,
   5414                                        X0100000 | xxxxx, X010101, vN, vD);
   5415                   goto done;
   5416                }
   5417                break;
   5418             /* 16x8 cases */
   5419             case ARM64vecsh_SSHR16x8: syned = True;
   5420             case ARM64vecsh_USHR16x8: /* fallthrough */
   5421                if (sh >= 1 && sh <= 15) {
   5422                   UInt xxxx = 16-sh;
   5423                   *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
   5424                                        X0010000 | xxxx, X000001, vN, vD);
   5425                   goto done;
   5426                }
   5427                break;
   5428             case ARM64vecsh_SHL16x8:
   5429                if (sh >= 1 && sh <= 15) {
   5430                   UInt xxxx = sh;
   5431                   *p++ = X_3_6_7_6_5_5(X010, X011110,
   5432                                        X0010000 | xxxx, X010101, vN, vD);
   5433                   goto done;
   5434                }
   5435                break;
   5436 
   5437 
   5438             /* 8x16 cases */
   5439             case ARM64vecsh_SSHR8x16: syned = True;
   5440             case ARM64vecsh_USHR8x16: /* fallthrough */
   5441                if (sh >= 1 && sh <= 7) {
   5442                   UInt xxx = 8-sh;
   5443                   *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
   5444                                        X0001000 | xxx, X000001, vN, vD);
   5445                   goto done;
   5446                }
   5447                break;
   5448             case ARM64vecsh_SHL8x16:
   5449                if (sh >= 1 && sh <= 7) {
   5450                   UInt xxx = sh;
   5451                   *p++ = X_3_6_7_6_5_5(X010, X011110,
   5452                                        X0001000 | xxx, X010101, vN, vD);
   5453                   goto done;
   5454                }
   5455                break;
   5456 
   5457             default:
   5458                break;
   5459          }
   5460          goto bad;
   5461       }
   5462 //ZZ       case ARMin_VAluS: {
   5463 //ZZ          UInt dN = fregNo(i->ARMin.VAluS.argL);
   5464 //ZZ          UInt dD = fregNo(i->ARMin.VAluS.dst);
   5465 //ZZ          UInt dM = fregNo(i->ARMin.VAluS.argR);
   5466 //ZZ          UInt bN = dN & 1;
   5467 //ZZ          UInt bD = dD & 1;
   5468 //ZZ          UInt bM = dM & 1;
   5469 //ZZ          UInt pqrs = X1111; /* undefined */
   5470 //ZZ          switch (i->ARMin.VAluS.op) {
   5471 //ZZ             case ARMvfp_ADD: pqrs = X0110; break;
   5472 //ZZ             case ARMvfp_SUB: pqrs = X0111; break;
   5473 //ZZ             case ARMvfp_MUL: pqrs = X0100; break;
   5474 //ZZ             case ARMvfp_DIV: pqrs = X1000; break;
   5475 //ZZ             default: goto bad;
   5476 //ZZ          }
   5477 //ZZ          vassert(pqrs != X1111);
   5478 //ZZ          UInt bP  = (pqrs >> 3) & 1;
   5479 //ZZ          UInt bQ  = (pqrs >> 2) & 1;
   5480 //ZZ          UInt bR  = (pqrs >> 1) & 1;
   5481 //ZZ          UInt bS  = (pqrs >> 0) & 1;
   5482 //ZZ          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
   5483 //ZZ                               (dN >> 1), (dD >> 1),
   5484 //ZZ                               X1010, BITS4(bN,bS,bM,0), (dM >> 1));
   5485 //ZZ          *p++ = insn;
   5486 //ZZ          goto done;
   5487 //ZZ       }
   5488 //ZZ       case ARMin_VUnaryS: {
   5489 //ZZ          UInt fD   = fregNo(i->ARMin.VUnaryS.dst);
   5490 //ZZ          UInt fM   = fregNo(i->ARMin.VUnaryS.src);
   5491 //ZZ          UInt insn = 0;
   5492 //ZZ          switch (i->ARMin.VUnaryS.op) {
   5493 //ZZ             case ARMvfpu_COPY:
   5494 //ZZ                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   5495 //ZZ                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   5496 //ZZ                                (fM >> 1));
   5497 //ZZ                break;
   5498 //ZZ             case ARMvfpu_ABS:
   5499 //ZZ                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   5500 //ZZ                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   5501 //ZZ                                (fM >> 1));
   5502 //ZZ                break;
   5503 //ZZ             case ARMvfpu_NEG:
   5504 //ZZ                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   5505 //ZZ                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   5506 //ZZ                                (fM >> 1));
   5507 //ZZ                break;
   5508 //ZZ             case ARMvfpu_SQRT:
   5509 //ZZ                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   5510 //ZZ                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   5511 //ZZ                                (fM >> 1));
   5512 //ZZ                break;
   5513 //ZZ             default:
   5514 //ZZ                goto bad;
   5515 //ZZ          }
   5516 //ZZ          *p++ = insn;
   5517 //ZZ          goto done;
   5518 //ZZ       }
   5519 //ZZ       case ARMin_VCMovD: {
   5520 //ZZ          UInt cc = (UInt)i->ARMin.VCMovD.cond;
   5521 //ZZ          UInt dD = dregNo(i->ARMin.VCMovD.dst);
   5522 //ZZ          UInt dM = dregNo(i->ARMin.VCMovD.src);
   5523 //ZZ          vassert(cc < 16 && cc != ARMcc_AL);
   5524 //ZZ          UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
   5525 //ZZ          *p++ = insn;
   5526 //ZZ          goto done;
   5527 //ZZ       }
   5528 //ZZ       case ARMin_VCMovS: {
   5529 //ZZ          UInt cc = (UInt)i->ARMin.VCMovS.cond;
   5530 //ZZ          UInt fD = fregNo(i->ARMin.VCMovS.dst);
   5531 //ZZ          UInt fM = fregNo(i->ARMin.VCMovS.src);
   5532 //ZZ          vassert(cc < 16 && cc != ARMcc_AL);
   5533 //ZZ          UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
   5534 //ZZ                               X0000,(fD >> 1),X1010,
   5535 //ZZ                               BITS4(0,1,(fM & 1),0), (fM >> 1));
   5536 //ZZ          *p++ = insn;
   5537 //ZZ          goto done;
   5538 //ZZ       }
   5539 //ZZ       case ARMin_VXferD: {
   5540 //ZZ          UInt dD  = dregNo(i->ARMin.VXferD.dD);
   5541 //ZZ          UInt rHi = iregNo(i->ARMin.VXferD.rHi);
   5542 //ZZ          UInt rLo = iregNo(i->ARMin.VXferD.rLo);
   5543 //ZZ          /* vmov dD, rLo, rHi is
   5544 //ZZ             E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
   5545 //ZZ             vmov rLo, rHi, dD is
   5546 //ZZ             E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
   5547 //ZZ          */
   5548 //ZZ          UInt insn
   5549 //ZZ             = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
   5550 //ZZ                        rHi, rLo, 0xB,
   5551 //ZZ                        BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
   5552 //ZZ          *p++ = insn;
   5553 //ZZ          goto done;
   5554 //ZZ       }
   5555 //ZZ       case ARMin_VXferS: {
   5556 //ZZ          UInt fD  = fregNo(i->ARMin.VXferS.fD);
   5557 //ZZ          UInt rLo = iregNo(i->ARMin.VXferS.rLo);
   5558 //ZZ          /* vmov fD, rLo is
   5559 //ZZ             E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
   5560 //ZZ             vmov rLo, fD is
   5561 //ZZ             E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
   5562 //ZZ          */
   5563 //ZZ          UInt insn
   5564 //ZZ             = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
   5565 //ZZ                        (fD >> 1) & 0xF, rLo, 0xA,
   5566 //ZZ                        BITS4((fD & 1),0,0,1), 0);
   5567 //ZZ          *p++ = insn;
   5568 //ZZ          goto done;
   5569 //ZZ       }
   5570 //ZZ       case ARMin_VCvtID: {
   5571 //ZZ          Bool iToD = i->ARMin.VCvtID.iToD;
   5572 //ZZ          Bool syned = i->ARMin.VCvtID.syned;
   5573 //ZZ          if (iToD && syned) {
   5574 //ZZ             // FSITOD: I32S-in-freg to F64-in-dreg
   5575 //ZZ             UInt regF = fregNo(i->ARMin.VCvtID.src);
   5576 //ZZ             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   5577 //ZZ             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   5578 //ZZ                                  X1011, BITS4(1,1,(regF & 1),0),
   5579 //ZZ                                  (regF >> 1) & 0xF);
   5580 //ZZ             *p++ = insn;
   5581 //ZZ             goto done;
   5582 //ZZ          }
   5583 //ZZ          if (iToD && (!syned)) {
   5584 //ZZ             // FUITOD: I32U-in-freg to F64-in-dreg
   5585 //ZZ             UInt regF = fregNo(i->ARMin.VCvtID.src);
   5586 //ZZ             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   5587 //ZZ             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   5588 //ZZ                                  X1011, BITS4(0,1,(regF & 1),0),
   5589 //ZZ                                  (regF >> 1) & 0xF);
   5590 //ZZ             *p++ = insn;
   5591 //ZZ             goto done;
   5592 //ZZ          }
   5593 //ZZ          if ((!iToD) && syned) {
   5594 //ZZ             // FTOSID: F64-in-dreg to I32S-in-freg
   5595 //ZZ             UInt regD = dregNo(i->ARMin.VCvtID.src);
   5596 //ZZ             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   5597 //ZZ             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   5598 //ZZ                                  X1101, (regF >> 1) & 0xF,
   5599 //ZZ                                  X1011, X0100, regD);
   5600 //ZZ             *p++ = insn;
   5601 //ZZ             goto done;
   5602 //ZZ          }
   5603 //ZZ          if ((!iToD) && (!syned)) {
   5604 //ZZ             // FTOUID: F64-in-dreg to I32U-in-freg
   5605 //ZZ             UInt regD = dregNo(i->ARMin.VCvtID.src);
   5606 //ZZ             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   5607 //ZZ             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   5608 //ZZ                                  X1100, (regF >> 1) & 0xF,
   5609 //ZZ                                  X1011, X0100, regD);
   5610 //ZZ             *p++ = insn;
   5611 //ZZ             goto done;
   5612 //ZZ          }
   5613 //ZZ          /*UNREACHED*/
   5614 //ZZ          vassert(0);
   5615 //ZZ       }
   5616 //ZZ       case ARMin_NLdStD: {
   5617 //ZZ          UInt regD = dregNo(i->ARMin.NLdStD.dD);
   5618 //ZZ          UInt regN, regM;
   5619 //ZZ          UInt D = regD >> 4;
   5620 //ZZ          UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
   5621 //ZZ          UInt insn;
   5622 //ZZ          vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
   5623 //ZZ          regD &= 0xF;
   5624 //ZZ          if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
   5625 //ZZ             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
   5626 //ZZ             regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
   5627 //ZZ          } else {
   5628 //ZZ             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
   5629 //ZZ             regM = 15;
   5630 //ZZ          }
   5631 //ZZ          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   5632 //ZZ                               regN, regD, X0111, X1000, regM);
   5633 //ZZ          *p++ = insn;
   5634 //ZZ          goto done;
   5635 //ZZ       }
   5636 //ZZ       case ARMin_NUnaryS: {
   5637 //ZZ          UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
   5638 //ZZ          UInt regD, D;
   5639 //ZZ          UInt regM, M;
   5640 //ZZ          UInt size = i->ARMin.NUnaryS.size;
   5641 //ZZ          UInt insn;
   5642 //ZZ          UInt opc, opc1, opc2;
   5643 //ZZ          switch (i->ARMin.NUnaryS.op) {
   5644 //ZZ 	    case ARMneon_VDUP:
   5645 //ZZ                if (i->ARMin.NUnaryS.size >= 16)
   5646 //ZZ                   goto bad;
   5647 //ZZ                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
   5648 //ZZ                   goto bad;
   5649 //ZZ                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   5650 //ZZ                   goto bad;
   5651 //ZZ                regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
   5652 //ZZ                         ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
   5653 //ZZ                         : dregNo(i->ARMin.NUnaryS.dst->reg);
   5654 //ZZ                regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
   5655 //ZZ                         ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
   5656 //ZZ                         : dregNo(i->ARMin.NUnaryS.src->reg);
   5657 //ZZ                D = regD >> 4;
   5658 //ZZ                M = regM >> 4;
   5659 //ZZ                regD &= 0xf;
   5660 //ZZ                regM &= 0xf;
   5661 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
   5662 //ZZ                                (i->ARMin.NUnaryS.size & 0xf), regD,
   5663 //ZZ                                X1100, BITS4(0,Q,M,0), regM);
   5664 //ZZ                *p++ = insn;
   5665 //ZZ                goto done;
   5666 //ZZ             case ARMneon_SETELEM:
   5667 //ZZ                regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
   5668 //ZZ                                 dregNo(i->ARMin.NUnaryS.dst->reg);
   5669 //ZZ                regM = iregNo(i->ARMin.NUnaryS.src->reg);
   5670 //ZZ                M = regM >> 4;
   5671 //ZZ                D = regD >> 4;
   5672 //ZZ                regM &= 0xF;
   5673 //ZZ                regD &= 0xF;
   5674 //ZZ                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
   5675 //ZZ                   goto bad;
   5676 //ZZ                switch (size) {
   5677 //ZZ                   case 0:
   5678 //ZZ                      if (i->ARMin.NUnaryS.dst->index > 7)
   5679 //ZZ                         goto bad;
   5680 //ZZ                      opc = X1000 | i->ARMin.NUnaryS.dst->index;
   5681 //ZZ                      break;
   5682 //ZZ                   case 1:
   5683 //ZZ                      if (i->ARMin.NUnaryS.dst->index > 3)
   5684 //ZZ                         goto bad;
   5685 //ZZ                      opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
   5686 //ZZ                      break;
   5687 //ZZ                   case 2:
   5688 //ZZ                      if (i->ARMin.NUnaryS.dst->index > 1)
   5689 //ZZ                         goto bad;
   5690 //ZZ                      opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
   5691 //ZZ                      break;
   5692 //ZZ                   default:
   5693 //ZZ                      goto bad;
   5694 //ZZ                }
   5695 //ZZ                opc1 = (opc >> 2) & 3;
   5696 //ZZ                opc2 = opc & 3;
   5697 //ZZ                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
   5698 //ZZ                                regD, regM, X1011,
   5699 //ZZ                                BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
   5700 //ZZ                *p++ = insn;
   5701 //ZZ                goto done;
   5702 //ZZ             case ARMneon_GETELEMU:
   5703 //ZZ                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   5704 //ZZ                                 dregNo(i->ARMin.NUnaryS.src->reg);
   5705 //ZZ                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   5706 //ZZ                M = regM >> 4;
   5707 //ZZ                D = regD >> 4;
   5708 //ZZ                regM &= 0xF;
   5709 //ZZ                regD &= 0xF;
   5710 //ZZ                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   5711 //ZZ                   goto bad;
   5712 //ZZ                switch (size) {
   5713 //ZZ                   case 0:
   5714 //ZZ                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   5715 //ZZ                         regM++;
   5716 //ZZ                         i->ARMin.NUnaryS.src->index -= 8;
   5717 //ZZ                      }
   5718 //ZZ                      if (i->ARMin.NUnaryS.src->index > 7)
   5719 //ZZ                         goto bad;
   5720 //ZZ                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   5721 //ZZ                      break;
   5722 //ZZ                   case 1:
   5723 //ZZ                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   5724 //ZZ                         regM++;
   5725 //ZZ                         i->ARMin.NUnaryS.src->index -= 4;
   5726 //ZZ                      }
   5727 //ZZ                      if (i->ARMin.NUnaryS.src->index > 3)
   5728 //ZZ                         goto bad;
   5729 //ZZ                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   5730 //ZZ                      break;
   5731 //ZZ                   case 2:
   5732 //ZZ                      goto bad;
   5733 //ZZ                   default:
   5734 //ZZ                      goto bad;
   5735 //ZZ                }
   5736 //ZZ                opc1 = (opc >> 2) & 3;
   5737 //ZZ                opc2 = opc & 3;
   5738 //ZZ                insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
   5739 //ZZ                                regM, regD, X1011,
   5740 //ZZ                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   5741 //ZZ                *p++ = insn;
   5742 //ZZ                goto done;
   5743 //ZZ             case ARMneon_GETELEMS:
   5744 //ZZ                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   5745 //ZZ                                 dregNo(i->ARMin.NUnaryS.src->reg);
   5746 //ZZ                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   5747 //ZZ                M = regM >> 4;
   5748 //ZZ                D = regD >> 4;
   5749 //ZZ                regM &= 0xF;
   5750 //ZZ                regD &= 0xF;
   5751 //ZZ                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   5752 //ZZ                   goto bad;
   5753 //ZZ                switch (size) {
   5754 //ZZ                   case 0:
   5755 //ZZ                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   5756 //ZZ                         regM++;
   5757 //ZZ                         i->ARMin.NUnaryS.src->index -= 8;
   5758 //ZZ                      }
   5759 //ZZ                      if (i->ARMin.NUnaryS.src->index > 7)
   5760 //ZZ                         goto bad;
   5761 //ZZ                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   5762 //ZZ                      break;
   5763 //ZZ                   case 1:
   5764 //ZZ                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   5765 //ZZ                         regM++;
   5766 //ZZ                         i->ARMin.NUnaryS.src->index -= 4;
   5767 //ZZ                      }
   5768 //ZZ                      if (i->ARMin.NUnaryS.src->index > 3)
   5769 //ZZ                         goto bad;
   5770 //ZZ                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   5771 //ZZ                      break;
   5772 //ZZ                   case 2:
   5773 //ZZ                      if (Q && i->ARMin.NUnaryS.src->index > 1) {
   5774 //ZZ                         regM++;
   5775 //ZZ                         i->ARMin.NUnaryS.src->index -= 2;
   5776 //ZZ                      }
   5777 //ZZ                      if (i->ARMin.NUnaryS.src->index > 1)
   5778 //ZZ                         goto bad;
   5779 //ZZ                      opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
   5780 //ZZ                      break;
   5781 //ZZ                   default:
   5782 //ZZ                      goto bad;
   5783 //ZZ                }
   5784 //ZZ                opc1 = (opc >> 2) & 3;
   5785 //ZZ                opc2 = opc & 3;
   5786 //ZZ                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
   5787 //ZZ                                regM, regD, X1011,
   5788 //ZZ                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   5789 //ZZ                *p++ = insn;
   5790 //ZZ                goto done;
   5791 //ZZ             default:
   5792 //ZZ                goto bad;
   5793 //ZZ          }
   5794 //ZZ       }
   5795 //ZZ       case ARMin_NUnary: {
   5796 //ZZ          UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
   5797 //ZZ          UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
   5798 //ZZ                        ? (qregNo(i->ARMin.NUnary.dst) << 1)
   5799 //ZZ                        : dregNo(i->ARMin.NUnary.dst);
   5800 //ZZ          UInt regM, M;
   5801 //ZZ          UInt D = regD >> 4;
   5802 //ZZ          UInt sz1 = i->ARMin.NUnary.size >> 1;
   5803 //ZZ          UInt sz2 = i->ARMin.NUnary.size & 1;
   5804 //ZZ          UInt sz = i->ARMin.NUnary.size;
   5805 //ZZ          UInt insn;
   5806 //ZZ          UInt F = 0; /* TODO: floating point EQZ ??? */
   5807 //ZZ          if (i->ARMin.NUnary.op != ARMneon_DUP) {
   5808 //ZZ             regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
   5809 //ZZ                      ? (qregNo(i->ARMin.NUnary.src) << 1)
   5810 //ZZ                      : dregNo(i->ARMin.NUnary.src);
   5811 //ZZ             M = regM >> 4;
   5812 //ZZ          } else {
   5813 //ZZ             regM = iregNo(i->ARMin.NUnary.src);
   5814 //ZZ             M = regM >> 4;
   5815 //ZZ          }
   5816 //ZZ          regD &= 0xF;
   5817 //ZZ          regM &= 0xF;
   5818 //ZZ          switch (i->ARMin.NUnary.op) {
   5819 //ZZ             case ARMneon_COPY: /* VMOV reg, reg */
   5820 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
   5821 //ZZ                                BITS4(M,Q,M,1), regM);
   5822 //ZZ                break;
   5823 //ZZ             case ARMneon_COPYN: /* VMOVN regD, regQ */
   5824 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   5825 //ZZ                                regD, X0010, BITS4(0,0,M,0), regM);
   5826 //ZZ                break;
   5827 //ZZ             case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
   5828 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   5829 //ZZ                                regD, X0010, BITS4(1,0,M,0), regM);
   5830 //ZZ                break;
   5831 //ZZ             case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
   5832 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   5833 //ZZ                                regD, X0010, BITS4(0,1,M,0), regM);
   5834 //ZZ                break;
   5835 //ZZ             case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
   5836 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   5837 //ZZ                                regD, X0010, BITS4(1,1,M,0), regM);
   5838 //ZZ                break;
   5839 //ZZ             case ARMneon_COPYLS: /* VMOVL regQ, regD */
   5840 //ZZ                if (sz >= 3)
   5841 //ZZ                   goto bad;
   5842 //ZZ                insn = XXXXXXXX(0xF, X0010,
   5843 //ZZ                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   5844 //ZZ                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   5845 //ZZ                                regD, X1010, BITS4(0,0,M,1), regM);
   5846 //ZZ                break;
   5847 //ZZ             case ARMneon_COPYLU: /* VMOVL regQ, regD */
   5848 //ZZ                if (sz >= 3)
   5849 //ZZ                   goto bad;
   5850 //ZZ                insn = XXXXXXXX(0xF, X0011,
   5851 //ZZ                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   5852 //ZZ                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   5853 //ZZ                                regD, X1010, BITS4(0,0,M,1), regM);
   5854 //ZZ                break;
   5855 //ZZ             case ARMneon_NOT: /* VMVN reg, reg*/
   5856 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   5857 //ZZ                                BITS4(1,Q,M,0), regM);
   5858 //ZZ                break;
   5859 //ZZ             case ARMneon_EQZ:
   5860 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   5861 //ZZ                                regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
   5862 //ZZ                break;
   5863 //ZZ             case ARMneon_CNT:
   5864 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   5865 //ZZ                                BITS4(0,Q,M,0), regM);
   5866 //ZZ                break;
   5867 //ZZ             case ARMneon_CLZ:
   5868 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   5869 //ZZ                                regD, X0100, BITS4(1,Q,M,0), regM);
   5870 //ZZ                break;
   5871 //ZZ             case ARMneon_CLS:
   5872 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   5873 //ZZ                                regD, X0100, BITS4(0,Q,M,0), regM);
   5874 //ZZ                break;
   5875 //ZZ             case ARMneon_ABS:
   5876 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   5877 //ZZ                                regD, X0011, BITS4(0,Q,M,0), regM);
   5878 //ZZ                break;
   5879 //ZZ             case ARMneon_DUP:
   5880 //ZZ                sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
   5881 //ZZ                sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
   5882 //ZZ                vassert(sz1 + sz2 < 2);
   5883 //ZZ                insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
   5884 //ZZ                                X1011, BITS4(D,0,sz2,1), X0000);
   5885 //ZZ                break;
   5886 //ZZ             case ARMneon_REV16:
   5887 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   5888 //ZZ                                regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
   5889 //ZZ                break;
   5890 //ZZ             case ARMneon_REV32:
   5891 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   5892 //ZZ                                regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
   5893 //ZZ                break;
   5894 //ZZ             case ARMneon_REV64:
   5895 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   5896 //ZZ                                regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
   5897 //ZZ                break;
   5898 //ZZ             case ARMneon_PADDLU:
   5899 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   5900 //ZZ                                regD, X0010, BITS4(1,Q,M,0), regM);
   5901 //ZZ                break;
   5902 //ZZ             case ARMneon_PADDLS:
   5903 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   5904 //ZZ                                regD, X0010, BITS4(0,Q,M,0), regM);
   5905 //ZZ                break;
   5906 //ZZ             case ARMneon_VQSHLNUU:
   5907 //ZZ                insn = XXXXXXXX(0xF, X0011,
   5908 //ZZ                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   5909 //ZZ                                sz & 0xf, regD, X0111,
   5910 //ZZ                                BITS4(sz >> 6,Q,M,1), regM);
   5911 //ZZ                break;
   5912 //ZZ             case ARMneon_VQSHLNSS:
   5913 //ZZ                insn = XXXXXXXX(0xF, X0010,
   5914 //ZZ                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   5915 //ZZ                                sz & 0xf, regD, X0111,
   5916 //ZZ                                BITS4(sz >> 6,Q,M,1), regM);
   5917 //ZZ                break;
   5918 //ZZ             case ARMneon_VQSHLNUS:
   5919 //ZZ                insn = XXXXXXXX(0xF, X0011,
   5920 //ZZ                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   5921 //ZZ                                sz & 0xf, regD, X0110,
   5922 //ZZ                                BITS4(sz >> 6,Q,M,1), regM);
   5923 //ZZ                break;
   5924 //ZZ             case ARMneon_VCVTFtoS:
   5925 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   5926 //ZZ                                BITS4(0,Q,M,0), regM);
   5927 //ZZ                break;
   5928 //ZZ             case ARMneon_VCVTFtoU:
   5929 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   5930 //ZZ                                BITS4(1,Q,M,0), regM);
   5931 //ZZ                break;
   5932 //ZZ             case ARMneon_VCVTStoF:
   5933 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   5934 //ZZ                                BITS4(0,Q,M,0), regM);
   5935 //ZZ                break;
   5936 //ZZ             case ARMneon_VCVTUtoF:
   5937 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   5938 //ZZ                                BITS4(1,Q,M,0), regM);
   5939 //ZZ                break;
   5940 //ZZ             case ARMneon_VCVTFtoFixedU:
   5941 //ZZ                sz1 = (sz >> 5) & 1;
   5942 //ZZ                sz2 = (sz >> 4) & 1;
   5943 //ZZ                sz &= 0xf;
   5944 //ZZ                insn = XXXXXXXX(0xF, X0011,
   5945 //ZZ                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   5946 //ZZ                                BITS4(0,Q,M,1), regM);
   5947 //ZZ                break;
   5948 //ZZ             case ARMneon_VCVTFtoFixedS:
   5949 //ZZ                sz1 = (sz >> 5) & 1;
   5950 //ZZ                sz2 = (sz >> 4) & 1;
   5951 //ZZ                sz &= 0xf;
   5952 //ZZ                insn = XXXXXXXX(0xF, X0010,
   5953 //ZZ                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   5954 //ZZ                                BITS4(0,Q,M,1), regM);
   5955 //ZZ                break;
   5956 //ZZ             case ARMneon_VCVTFixedUtoF:
   5957 //ZZ                sz1 = (sz >> 5) & 1;
   5958 //ZZ                sz2 = (sz >> 4) & 1;
   5959 //ZZ                sz &= 0xf;
   5960 //ZZ                insn = XXXXXXXX(0xF, X0011,
   5961 //ZZ                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   5962 //ZZ                                BITS4(0,Q,M,1), regM);
   5963 //ZZ                break;
   5964 //ZZ             case ARMneon_VCVTFixedStoF:
   5965 //ZZ                sz1 = (sz >> 5) & 1;
   5966 //ZZ                sz2 = (sz >> 4) & 1;
   5967 //ZZ                sz &= 0xf;
   5968 //ZZ                insn = XXXXXXXX(0xF, X0010,
   5969 //ZZ                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   5970 //ZZ                                BITS4(0,Q,M,1), regM);
   5971 //ZZ                break;
   5972 //ZZ             case ARMneon_VCVTF32toF16:
   5973 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
   5974 //ZZ                                BITS4(0,0,M,0), regM);
   5975 //ZZ                break;
   5976 //ZZ             case ARMneon_VCVTF16toF32:
   5977 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
   5978 //ZZ                                BITS4(0,0,M,0), regM);
   5979 //ZZ                break;
   5980 //ZZ             case ARMneon_VRECIP:
   5981 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   5982 //ZZ                                BITS4(0,Q,M,0), regM);
   5983 //ZZ                break;
   5984 //ZZ             case ARMneon_VRECIPF:
   5985 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   5986 //ZZ                                BITS4(0,Q,M,0), regM);
   5987 //ZZ                break;
   5988 //ZZ             case ARMneon_VABSFP:
   5989 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   5990 //ZZ                                BITS4(0,Q,M,0), regM);
   5991 //ZZ                break;
   5992 //ZZ             case ARMneon_VRSQRTEFP:
   5993 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   5994 //ZZ                                BITS4(1,Q,M,0), regM);
   5995 //ZZ                break;
   5996 //ZZ             case ARMneon_VRSQRTE:
   5997 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   5998 //ZZ                                BITS4(1,Q,M,0), regM);
   5999 //ZZ                break;
   6000 //ZZ             case ARMneon_VNEGF:
   6001 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   6002 //ZZ                                BITS4(1,Q,M,0), regM);
   6003 //ZZ                break;
   6004 //ZZ
   6005 //ZZ             default:
   6006 //ZZ                goto bad;
   6007 //ZZ          }
   6008 //ZZ          *p++ = insn;
   6009 //ZZ          goto done;
   6010 //ZZ       }
   6011 //ZZ       case ARMin_NDual: {
   6012 //ZZ          UInt Q = i->ARMin.NDual.Q ? 1 : 0;
   6013 //ZZ          UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
   6014 //ZZ                        ? (qregNo(i->ARMin.NDual.arg1) << 1)
   6015 //ZZ                        : dregNo(i->ARMin.NDual.arg1);
   6016 //ZZ          UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
   6017 //ZZ                        ? (qregNo(i->ARMin.NDual.arg2) << 1)
   6018 //ZZ                        : dregNo(i->ARMin.NDual.arg2);
   6019 //ZZ          UInt D = regD >> 4;
   6020 //ZZ          UInt M = regM >> 4;
   6021 //ZZ          UInt sz1 = i->ARMin.NDual.size >> 1;
   6022 //ZZ          UInt sz2 = i->ARMin.NDual.size & 1;
   6023 //ZZ          UInt insn;
   6024 //ZZ          regD &= 0xF;
   6025 //ZZ          regM &= 0xF;
   6026 //ZZ          switch (i->ARMin.NDual.op) {
   6027 //ZZ             case ARMneon_TRN: /* VTRN reg, reg */
   6028 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   6029 //ZZ                                regD, X0000, BITS4(1,Q,M,0), regM);
   6030 //ZZ                break;
   6031 //ZZ             case ARMneon_ZIP: /* VZIP reg, reg */
   6032 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   6033 //ZZ                                regD, X0001, BITS4(1,Q,M,0), regM);
   6034 //ZZ                break;
   6035 //ZZ             case ARMneon_UZP: /* VUZP reg, reg */
   6036 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   6037 //ZZ                                regD, X0001, BITS4(0,Q,M,0), regM);
   6038 //ZZ                break;
   6039 //ZZ             default:
   6040 //ZZ                goto bad;
   6041 //ZZ          }
   6042 //ZZ          *p++ = insn;
   6043 //ZZ          goto done;
   6044 //ZZ       }
   6045 //ZZ       case ARMin_NBinary: {
   6046 //ZZ          UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
   6047 //ZZ          UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
   6048 //ZZ                        ? (qregNo(i->ARMin.NBinary.dst) << 1)
   6049 //ZZ                        : dregNo(i->ARMin.NBinary.dst);
   6050 //ZZ          UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
   6051 //ZZ                        ? (qregNo(i->ARMin.NBinary.argL) << 1)
   6052 //ZZ                        : dregNo(i->ARMin.NBinary.argL);
   6053 //ZZ          UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
   6054 //ZZ                        ? (qregNo(i->ARMin.NBinary.argR) << 1)
   6055 //ZZ                        : dregNo(i->ARMin.NBinary.argR);
   6056 //ZZ          UInt sz1 = i->ARMin.NBinary.size >> 1;
   6057 //ZZ          UInt sz2 = i->ARMin.NBinary.size & 1;
   6058 //ZZ          UInt D = regD >> 4;
   6059 //ZZ          UInt N = regN >> 4;
   6060 //ZZ          UInt M = regM >> 4;
   6061 //ZZ          UInt insn;
   6062 //ZZ          regD &= 0xF;
   6063 //ZZ          regM &= 0xF;
   6064 //ZZ          regN &= 0xF;
   6065 //ZZ          switch (i->ARMin.NBinary.op) {
   6066 //ZZ             case ARMneon_VAND: /* VAND reg, reg, reg */
   6067 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
   6068 //ZZ                                BITS4(N,Q,M,1), regM);
   6069 //ZZ                break;
   6070 //ZZ             case ARMneon_VORR: /* VORR reg, reg, reg*/
   6071 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
   6072 //ZZ                                BITS4(N,Q,M,1), regM);
   6073 //ZZ                break;
   6074 //ZZ             case ARMneon_VXOR: /* VEOR reg, reg, reg */
   6075 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
   6076 //ZZ                                BITS4(N,Q,M,1), regM);
   6077 //ZZ                break;
   6078 //ZZ             case ARMneon_VADD: /* VADD reg, reg, reg */
   6079 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6080 //ZZ                                X1000, BITS4(N,Q,M,0), regM);
   6081 //ZZ                break;
   6082 //ZZ             case ARMneon_VSUB: /* VSUB reg, reg, reg */
   6083 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6084 //ZZ                                X1000, BITS4(N,Q,M,0), regM);
   6085 //ZZ                break;
   6086 //ZZ             case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
   6087 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6088 //ZZ                                X0110, BITS4(N,Q,M,1), regM);
   6089 //ZZ                break;
   6090 //ZZ             case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
   6091 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6092 //ZZ                                X0110, BITS4(N,Q,M,1), regM);
   6093 //ZZ                break;
   6094 //ZZ             case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
   6095 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6096 //ZZ                                X0110, BITS4(N,Q,M,0), regM);
   6097 //ZZ                break;
   6098 //ZZ             case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
   6099 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6100 //ZZ                                X0110, BITS4(N,Q,M,0), regM);
   6101 //ZZ                break;
   6102 //ZZ             case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
   6103 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6104 //ZZ                                X0001, BITS4(N,Q,M,0), regM);
   6105 //ZZ                break;
   6106 //ZZ             case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
   6107 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6108 //ZZ                                X0001, BITS4(N,Q,M,0), regM);
   6109 //ZZ                break;
   6110 //ZZ             case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
   6111 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6112 //ZZ                                X0000, BITS4(N,Q,M,1), regM);
   6113 //ZZ                break;
   6114 //ZZ             case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
   6115 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6116 //ZZ                                X0000, BITS4(N,Q,M,1), regM);
   6117 //ZZ                break;
   6118 //ZZ             case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
   6119 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6120 //ZZ                                X0010, BITS4(N,Q,M,1), regM);
   6121 //ZZ                break;
   6122 //ZZ             case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
   6123 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6124 //ZZ                                X0010, BITS4(N,Q,M,1), regM);
   6125 //ZZ                break;
   6126 //ZZ             case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
   6127 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6128 //ZZ                                X0011, BITS4(N,Q,M,0), regM);
   6129 //ZZ                break;
   6130 //ZZ             case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
   6131 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6132 //ZZ                                X0011, BITS4(N,Q,M,0), regM);
   6133 //ZZ                break;
   6134 //ZZ             case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
   6135 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6136 //ZZ                                X0011, BITS4(N,Q,M,1), regM);
   6137 //ZZ                break;
   6138 //ZZ             case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
   6139 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6140 //ZZ                                X0011, BITS4(N,Q,M,1), regM);
   6141 //ZZ                break;
   6142 //ZZ             case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
   6143 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6144 //ZZ                                X1000, BITS4(N,Q,M,1), regM);
   6145 //ZZ                break;
   6146 //ZZ             case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
   6147 //ZZ                if (i->ARMin.NBinary.size >= 16)
   6148 //ZZ                   goto bad;
   6149 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
   6150 //ZZ                                i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
   6151 //ZZ                                regM);
   6152 //ZZ                break;
   6153 //ZZ             case ARMneon_VMUL:
   6154 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6155 //ZZ                                X1001, BITS4(N,Q,M,1), regM);
   6156 //ZZ                break;
   6157 //ZZ             case ARMneon_VMULLU:
   6158 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
   6159 //ZZ                                X1100, BITS4(N,0,M,0), regM);
   6160 //ZZ                break;
   6161 //ZZ             case ARMneon_VMULLS:
   6162 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   6163 //ZZ                                X1100, BITS4(N,0,M,0), regM);
   6164 //ZZ                break;
   6165 //ZZ             case ARMneon_VMULP:
   6166 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6167 //ZZ                                X1001, BITS4(N,Q,M,1), regM);
   6168 //ZZ                break;
   6169 //ZZ             case ARMneon_VMULFP:
   6170 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   6171 //ZZ                                X1101, BITS4(N,Q,M,1), regM);
   6172 //ZZ                break;
   6173 //ZZ             case ARMneon_VMULLP:
   6174 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   6175 //ZZ                                X1110, BITS4(N,0,M,0), regM);
   6176 //ZZ                break;
   6177 //ZZ             case ARMneon_VQDMULH:
   6178 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6179 //ZZ                                X1011, BITS4(N,Q,M,0), regM);
   6180 //ZZ                break;
   6181 //ZZ             case ARMneon_VQRDMULH:
   6182 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6183 //ZZ                                X1011, BITS4(N,Q,M,0), regM);
   6184 //ZZ                break;
   6185 //ZZ             case ARMneon_VQDMULL:
   6186 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   6187 //ZZ                                X1101, BITS4(N,0,M,0), regM);
   6188 //ZZ                break;
   6189 //ZZ             case ARMneon_VTBL:
   6190 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
   6191 //ZZ                                X1000, BITS4(N,0,M,0), regM);
   6192 //ZZ                break;
   6193 //ZZ             case ARMneon_VPADD:
   6194 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6195 //ZZ                                X1011, BITS4(N,Q,M,1), regM);
   6196 //ZZ                break;
   6197 //ZZ             case ARMneon_VPADDFP:
   6198 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   6199 //ZZ                                X1101, BITS4(N,Q,M,0), regM);
   6200 //ZZ                break;
   6201 //ZZ             case ARMneon_VPMINU:
   6202 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6203 //ZZ                                X1010, BITS4(N,Q,M,1), regM);
   6204 //ZZ                break;
   6205 //ZZ             case ARMneon_VPMINS:
   6206 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6207 //ZZ                                X1010, BITS4(N,Q,M,1), regM);
   6208 //ZZ                break;
   6209 //ZZ             case ARMneon_VPMAXU:
   6210 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6211 //ZZ                                X1010, BITS4(N,Q,M,0), regM);
   6212 //ZZ                break;
   6213 //ZZ             case ARMneon_VPMAXS:
   6214 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6215 //ZZ                                X1010, BITS4(N,Q,M,0), regM);
   6216 //ZZ                break;
   6217 //ZZ             case ARMneon_VADDFP: /* VADD reg, reg, reg */
   6218 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   6219 //ZZ                                X1101, BITS4(N,Q,M,0), regM);
   6220 //ZZ                break;
   6221 //ZZ             case ARMneon_VSUBFP: /* VADD reg, reg, reg */
   6222 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   6223 //ZZ                                X1101, BITS4(N,Q,M,0), regM);
   6224 //ZZ                break;
   6225 //ZZ             case ARMneon_VABDFP: /* VABD reg, reg, reg */
   6226 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   6227 //ZZ                                X1101, BITS4(N,Q,M,0), regM);
   6228 //ZZ                break;
   6229 //ZZ             case ARMneon_VMINF:
   6230 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   6231 //ZZ                                X1111, BITS4(N,Q,M,0), regM);
   6232 //ZZ                break;
   6233 //ZZ             case ARMneon_VMAXF:
   6234 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   6235 //ZZ                                X1111, BITS4(N,Q,M,0), regM);
   6236 //ZZ                break;
   6237 //ZZ             case ARMneon_VPMINF:
   6238 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   6239 //ZZ                                X1111, BITS4(N,Q,M,0), regM);
   6240 //ZZ                break;
   6241 //ZZ             case ARMneon_VPMAXF:
   6242 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   6243 //ZZ                                X1111, BITS4(N,Q,M,0), regM);
   6244 //ZZ                break;
   6245 //ZZ             case ARMneon_VRECPS:
   6246 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
   6247 //ZZ                                BITS4(N,Q,M,1), regM);
   6248 //ZZ                break;
   6249 //ZZ             case ARMneon_VCGTF:
   6250 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
   6251 //ZZ                                BITS4(N,Q,M,0), regM);
   6252 //ZZ                break;
   6253 //ZZ             case ARMneon_VCGEF:
   6254 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
   6255 //ZZ                                BITS4(N,Q,M,0), regM);
   6256 //ZZ                break;
   6257 //ZZ             case ARMneon_VCEQF:
   6258 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
   6259 //ZZ                                BITS4(N,Q,M,0), regM);
   6260 //ZZ                break;
   6261 //ZZ             case ARMneon_VRSQRTS:
   6262 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
   6263 //ZZ                                BITS4(N,Q,M,1), regM);
   6264 //ZZ                break;
   6265 //ZZ             default:
   6266 //ZZ                goto bad;
   6267 //ZZ          }
   6268 //ZZ          *p++ = insn;
   6269 //ZZ          goto done;
   6270 //ZZ       }
   6271 //ZZ       case ARMin_NShift: {
   6272 //ZZ          UInt Q = i->ARMin.NShift.Q ? 1 : 0;
   6273 //ZZ          UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
   6274 //ZZ                        ? (qregNo(i->ARMin.NShift.dst) << 1)
   6275 //ZZ                        : dregNo(i->ARMin.NShift.dst);
   6276 //ZZ          UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
   6277 //ZZ                        ? (qregNo(i->ARMin.NShift.argL) << 1)
   6278 //ZZ                        : dregNo(i->ARMin.NShift.argL);
   6279 //ZZ          UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
   6280 //ZZ                        ? (qregNo(i->ARMin.NShift.argR) << 1)
   6281 //ZZ                        : dregNo(i->ARMin.NShift.argR);
   6282 //ZZ          UInt sz1 = i->ARMin.NShift.size >> 1;
   6283 //ZZ          UInt sz2 = i->ARMin.NShift.size & 1;
   6284 //ZZ          UInt D = regD >> 4;
   6285 //ZZ          UInt N = regN >> 4;
   6286 //ZZ          UInt M = regM >> 4;
   6287 //ZZ          UInt insn;
   6288 //ZZ          regD &= 0xF;
   6289 //ZZ          regM &= 0xF;
   6290 //ZZ          regN &= 0xF;
   6291 //ZZ          switch (i->ARMin.NShift.op) {
   6292 //ZZ             case ARMneon_VSHL:
   6293 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6294 //ZZ                                X0100, BITS4(N,Q,M,0), regM);
   6295 //ZZ                break;
   6296 //ZZ             case ARMneon_VSAL:
   6297 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6298 //ZZ                                X0100, BITS4(N,Q,M,0), regM);
   6299 //ZZ                break;
   6300 //ZZ             case ARMneon_VQSHL:
   6301 //ZZ                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   6302 //ZZ                                X0100, BITS4(N,Q,M,1), regM);
   6303 //ZZ                break;
   6304 //ZZ             case ARMneon_VQSAL:
   6305 //ZZ                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   6306 //ZZ                                X0100, BITS4(N,Q,M,1), regM);
   6307 //ZZ                break;
   6308 //ZZ             default:
   6309 //ZZ                goto bad;
   6310 //ZZ          }
   6311 //ZZ          *p++ = insn;
   6312 //ZZ          goto done;
   6313 //ZZ       }
   6314 //ZZ       case ARMin_NShl64: {
   6315 //ZZ          HReg regDreg = i->ARMin.NShl64.dst;
   6316 //ZZ          HReg regMreg = i->ARMin.NShl64.src;
   6317 //ZZ          UInt amt     = i->ARMin.NShl64.amt;
   6318 //ZZ          vassert(amt >= 1 && amt <= 63);
   6319 //ZZ          vassert(hregClass(regDreg) == HRcFlt64);
   6320 //ZZ          vassert(hregClass(regMreg) == HRcFlt64);
   6321 //ZZ          UInt regD = dregNo(regDreg);
   6322 //ZZ          UInt regM = dregNo(regMreg);
   6323 //ZZ          UInt D    = (regD >> 4) & 1;
   6324 //ZZ          UInt Vd   = regD & 0xF;
   6325 //ZZ          UInt L    = 1;
   6326 //ZZ          UInt Q    = 0; /* always 64-bit */
   6327 //ZZ          UInt M    = (regM >> 4) & 1;
   6328 //ZZ          UInt Vm   = regM & 0xF;
   6329 //ZZ          UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
   6330 //ZZ                               amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
   6331 //ZZ          *p++ = insn;
   6332 //ZZ          goto done;
   6333 //ZZ       }
   6334       case ARM64in_VImmQ: {
   6335          UInt   rQ  = qregNo(i->ARM64in.VImmQ.rQ);
   6336          UShort imm = i->ARM64in.VImmQ.imm;
   6337          if (imm == 0x0000) {
   6338             /* movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ */
   6339             vassert(rQ < 32);
   6340             *p++ = 0x4F000400 | rQ;
   6341             goto done;
   6342          }
   6343          if (imm == 0x0001) {
   6344             /* movi rD, #0xFF == 0x2F 0x00 0xE4 001 rD */
   6345             vassert(rQ < 32);
   6346             *p++ = 0x2F00E420 | rQ;
   6347             goto done;
   6348          }
   6349          if (imm == 0x0003) {
   6350             /* movi rD, #0xFFFF == 0x2F 0x00 0xE4 011 rD */
   6351             vassert(rQ < 32);
   6352             *p++ = 0x2F00E460 | rQ;
   6353             goto done;
   6354          }
   6355          if (imm == 0x000F) {
   6356             /* movi rD, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rD */
   6357             vassert(rQ < 32);
   6358             *p++ = 0x2F00E5E0 | rQ;
   6359             goto done;
   6360          }
   6361          if (imm == 0x00FF) {
   6362             /* movi rD, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rD */
   6363             vassert(rQ < 32);
   6364             *p++ = 0x2F07E7E0 | rQ;
   6365             goto done;
   6366          }
   6367          goto bad; /* no other handled cases right now */
   6368       }
   6369 
   6370       case ARM64in_VDfromX: {
   6371          /* INS Vd.D[0], rX
   6372             0100 1110 0000 1000 0001 11 nn dd   INS Vd.D[0], Xn
   6373             This isn't wonderful, in the sense that the upper half of
   6374             the vector register stays unchanged and thus the insn is
   6375             data dependent on its output register. */
   6376          UInt dd = dregNo(i->ARM64in.VDfromX.rD);
   6377          UInt xx = iregNo(i->ARM64in.VDfromX.rX);
   6378          vassert(xx < 31);
   6379          *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
   6380          goto done;
   6381       }
   6382 
   6383       case ARM64in_VQfromXX: {
   6384          /* What we really generate is a two insn sequence:
   6385                INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
   6386             0100 1110 0000 1000 0001 11 nn dd   INS Vd.D[0], Xn
   6387             0100 1110 0001 1000 0001 11 nn dd   INS Vd.D[1], Xn
   6388          */
   6389          UInt qq  = qregNo(i->ARM64in.VQfromXX.rQ);
   6390          UInt xhi = iregNo(i->ARM64in.VQfromXX.rXhi);
   6391          UInt xlo = iregNo(i->ARM64in.VQfromXX.rXlo);
   6392          vassert(xhi < 31 && xlo < 31);
   6393          *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
   6394          *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
   6395          goto done;
   6396       }
   6397 
   6398       case ARM64in_VXfromQ: {
   6399          /* 010 0111 0000 01000 001111 nn dd  UMOV Xd, Vn.D[0]
   6400             010 0111 0000 11000 001111 nn dd  UMOV Xd, Vn.D[1]
   6401          */
   6402          UInt dd     = iregNo(i->ARM64in.VXfromQ.rX);
   6403          UInt nn     = qregNo(i->ARM64in.VXfromQ.rQ);
   6404          UInt laneNo = i->ARM64in.VXfromQ.laneNo;
   6405          vassert(dd < 31);
   6406          vassert(laneNo < 2);
   6407          *p++ = X_3_8_5_6_5_5(X010, X01110000,
   6408                               laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
   6409          goto done;
   6410       }
   6411 
   6412       case ARM64in_VMov: {
   6413          /* 000 11110 00 10000 00 10000 n d   FMOV Sd, Sn
   6414             000 11110 01 10000 00 10000 n d   FMOV Dd, Dn
   6415             010 01110 10 1 n    0 00111 n d   MOV Vd.16b, Vn.16b
   6416          */
   6417         HReg rD = i->ARM64in.VMov.dst;
   6418         HReg rN = i->ARM64in.VMov.src;
   6419         switch (i->ARM64in.VMov.szB) {
   6420            case 8: {
   6421               UInt dd = dregNo(rD);
   6422               UInt nn = dregNo(rN);
   6423               *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
   6424               goto done;
   6425            }
   6426            default:
   6427               break;
   6428         }
   6429         goto bad;
   6430       }
   6431 //ZZ       case ARMin_NeonImm: {
   6432 //ZZ          UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
   6433 //ZZ          UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
   6434 //ZZ                           dregNo(i->ARMin.NeonImm.dst);
   6435 //ZZ          UInt D = regD >> 4;
   6436 //ZZ          UInt imm = i->ARMin.NeonImm.imm->imm8;
   6437 //ZZ          UInt tp = i->ARMin.NeonImm.imm->type;
   6438 //ZZ          UInt j = imm >> 7;
   6439 //ZZ          UInt imm3 = (imm >> 4) & 0x7;
   6440 //ZZ          UInt imm4 = imm & 0xF;
   6441 //ZZ          UInt cmode, op;
   6442 //ZZ          UInt insn;
   6443 //ZZ          regD &= 0xF;
   6444 //ZZ          if (tp == 9)
   6445 //ZZ             op = 1;
   6446 //ZZ          else
   6447 //ZZ             op = 0;
   6448 //ZZ          switch (tp) {
   6449 //ZZ             case 0:
   6450 //ZZ             case 1:
   6451 //ZZ             case 2:
   6452 //ZZ             case 3:
   6453 //ZZ             case 4:
   6454 //ZZ             case 5:
   6455 //ZZ                cmode = tp << 1;
   6456 //ZZ                break;
   6457 //ZZ             case 9:
   6458 //ZZ             case 6:
   6459 //ZZ                cmode = 14;
   6460 //ZZ                break;
   6461 //ZZ             case 7:
   6462 //ZZ                cmode = 12;
   6463 //ZZ                break;
   6464 //ZZ             case 8:
   6465 //ZZ                cmode = 13;
   6466 //ZZ                break;
   6467 //ZZ             case 10:
   6468 //ZZ                cmode = 15;
   6469 //ZZ                break;
   6470 //ZZ             default:
   6471 //ZZ                vpanic("ARMin_NeonImm");
   6472 //ZZ
   6473 //ZZ          }
   6474 //ZZ          insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
   6475 //ZZ                          cmode, BITS4(0,Q,op,1), imm4);
   6476 //ZZ          *p++ = insn;
   6477 //ZZ          goto done;
   6478 //ZZ       }
   6479 //ZZ       case ARMin_NCMovQ: {
   6480 //ZZ          UInt cc = (UInt)i->ARMin.NCMovQ.cond;
   6481 //ZZ          UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
   6482 //ZZ          UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
   6483 //ZZ          UInt vM = qM & 0xF;
   6484 //ZZ          UInt vD = qD & 0xF;
   6485 //ZZ          UInt M  = (qM >> 4) & 1;
   6486 //ZZ          UInt D  = (qD >> 4) & 1;
   6487 //ZZ          vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
   6488 //ZZ          /* b!cc here+8: !cc A00 0000 */
   6489 //ZZ          UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
   6490 //ZZ          *p++ = insn;
   6491 //ZZ          /* vmov qD, qM */
   6492 //ZZ          insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
   6493 //ZZ                          vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
   6494 //ZZ          *p++ = insn;
   6495 //ZZ          goto done;
   6496 //ZZ       }
   6497 //ZZ       case ARMin_Add32: {
   6498 //ZZ          UInt regD = iregNo(i->ARMin.Add32.rD);
   6499 //ZZ          UInt regN = iregNo(i->ARMin.Add32.rN);
   6500 //ZZ          UInt imm32 = i->ARMin.Add32.imm32;
   6501 //ZZ          vassert(regD != regN);
   6502 //ZZ          /* MOV regD, imm32 */
   6503 //ZZ          p = imm32_to_iregNo((UInt *)p, regD, imm32);
   6504 //ZZ          /* ADD regD, regN, regD */
   6505 //ZZ          UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
   6506 //ZZ          *p++ = insn;
   6507 //ZZ          goto done;
   6508 //ZZ       }
   6509 
   6510       case ARM64in_EvCheck: {
   6511          /* The sequence is fixed (canned) except for the two amodes
   6512             supplied by the insn.  These don't change the length, though.
   6513             We generate:
   6514                ldr  w9, [x21 + #8]   8 == offsetof(host_EvC_COUNTER)
   6515                subs w9, w9, #1
   6516                str  w9, [x21 + #8]   8 == offsetof(host_EvC_COUNTER)
   6517                bpl  nofail
   6518                ldr  x9, [x21 + #0]   0 == offsetof(host_EvC_FAILADDR)
   6519                br   x9
   6520               nofail:
   6521          */
   6522          UInt* p0 = p;
   6523          p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
   6524                                 i->ARM64in.EvCheck.amCounter);
   6525          *p++ = 0x71000529; /* subs w9, w9, #1 */
   6526          p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
   6527                                 i->ARM64in.EvCheck.amCounter);
   6528          *p++ = 0x54000065; /* bpl nofail */
   6529          p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
   6530                                 i->ARM64in.EvCheck.amFailAddr);
   6531          *p++ = 0xD61F0120; /* br x9 */
   6532          /* nofail: */
   6533 
   6534          /* Crosscheck */
   6535          vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
   6536          goto done;
   6537       }
   6538 
   6539 //ZZ       case ARMin_ProfInc: {
   6540 //ZZ          /* We generate:
   6541 //ZZ               (ctrP is unknown now, so use 0x65556555 in the
   6542 //ZZ               expectation that a later call to LibVEX_patchProfCtr
   6543 //ZZ               will be used to fill in the immediate fields once the
   6544 //ZZ               right value is known.)
   6545 //ZZ             movw r12, lo16(0x65556555)
   6546 //ZZ             movt r12, lo16(0x65556555)
   6547 //ZZ             ldr  r11, [r12]
   6548 //ZZ             adds r11, r11, #1
   6549 //ZZ             str  r11, [r12]
   6550 //ZZ             ldr  r11, [r12+4]
   6551 //ZZ             adc  r11, r11, #0
   6552 //ZZ             str  r11, [r12+4]
   6553 //ZZ          */
   6554 //ZZ          p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
   6555 //ZZ          *p++ = 0xE59CB000;
   6556 //ZZ          *p++ = 0xE29BB001;
   6557 //ZZ          *p++ = 0xE58CB000;
   6558 //ZZ          *p++ = 0xE59CB004;
   6559 //ZZ          *p++ = 0xE2ABB000;
   6560 //ZZ          *p++ = 0xE58CB004;
   6561 //ZZ          /* Tell the caller .. */
   6562 //ZZ          vassert(!(*is_profInc));
   6563 //ZZ          *is_profInc = True;
   6564 //ZZ          goto done;
   6565 //ZZ       }
   6566 
   6567       /* ... */
   6568       default:
   6569          goto bad;
   6570     }
   6571 
   6572   bad:
   6573    ppARM64Instr(i);
   6574    vpanic("emit_ARM64Instr");
   6575    /*NOTREACHED*/
   6576 
   6577   done:
   6578    vassert(((UChar*)p) - &buf[0] <= 36);
   6579    return ((UChar*)p) - &buf[0];
   6580 }
   6581 
   6582 
   6583 /* How big is an event check?  See case for ARM64in_EvCheck in
   6584    emit_ARM64Instr just above.  That crosschecks what this returns, so
   6585    we can tell if we're inconsistent. */
   6586 Int evCheckSzB_ARM64 ( void )
   6587 {
   6588    return 24;
   6589 }
   6590 
   6591 
   6592 /* NB: what goes on here has to be very closely coordinated with the
   6593    emitInstr case for XDirect, above. */
   6594 VexInvalRange chainXDirect_ARM64 ( void* place_to_chain,
   6595                                    void* disp_cp_chain_me_EXPECTED,
   6596                                    void* place_to_jump_to )
   6597 {
   6598    /* What we're expecting to see is:
   6599         movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
   6600         movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
   6601         movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
   6602         movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
   6603         blr  x9
   6604       viz
   6605         <16 bytes generated by imm64_to_iregNo_EXACTLY4>
   6606         D6 3F 01 20
   6607    */
   6608    UInt* p = (UInt*)place_to_chain;
   6609    vassert(0 == (3 & (HWord)p));
   6610    vassert(is_imm64_to_iregNo_EXACTLY4(
   6611               p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
   6612    vassert(p[4] == 0xD63F0120);
   6613 
   6614    /* And what we want to change it to is:
   6615         movw x9, place_to_jump_to[15:0]
   6616         movk x9, place_to_jump_to[31:15], lsl 16
   6617         movk x9, place_to_jump_to[47:32], lsl 32
   6618         movk x9, place_to_jump_to[63:48], lsl 48
   6619         br   x9
   6620       viz
   6621         <16 bytes generated by imm64_to_iregNo_EXACTLY4>
   6622         D6 1F 01 20
   6623 
   6624       The replacement has the same length as the original.
   6625    */
   6626    (void)imm64_to_iregNo_EXACTLY4(
   6627             p, /*x*/9, Ptr_to_ULong(place_to_jump_to));
   6628    p[4] = 0xD61F0120;
   6629 
   6630    VexInvalRange vir = {(HWord)p, 20};
   6631    return vir;
   6632 }
   6633 
   6634 
   6635 /* NB: what goes on here has to be very closely coordinated with the
   6636    emitInstr case for XDirect, above. */
   6637 VexInvalRange unchainXDirect_ARM64 ( void* place_to_unchain,
   6638                                      void* place_to_jump_to_EXPECTED,
   6639                                      void* disp_cp_chain_me )
   6640 {
   6641    /* What we're expecting to see is:
   6642         movw x9, place_to_jump_to_EXPECTED[15:0]
   6643         movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
   6644         movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
   6645         movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
   6646         br   x9
   6647       viz
   6648         <16 bytes generated by imm64_to_iregNo_EXACTLY4>
   6649         D6 1F 01 20
   6650    */
   6651    UInt* p = (UInt*)place_to_unchain;
   6652    vassert(0 == (3 & (HWord)p));
   6653    vassert(is_imm64_to_iregNo_EXACTLY4(
   6654               p, /*x*/9, Ptr_to_ULong(place_to_jump_to_EXPECTED)));
   6655    vassert(p[4] == 0xD61F0120);
   6656 
   6657    /* And what we want to change it to is:
   6658         movw x9, disp_cp_chain_me_to[15:0]
   6659         movk x9, disp_cp_chain_me_to[31:15], lsl 16
   6660         movk x9, disp_cp_chain_me_to[47:32], lsl 32
   6661         movk x9, disp_cp_chain_me_to[63:48], lsl 48
   6662         blr  x9
   6663       viz
   6664         <16 bytes generated by imm64_to_iregNo_EXACTLY4>
   6665         D6 3F 01 20
   6666    */
   6667    (void)imm64_to_iregNo_EXACTLY4(
   6668             p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me));
   6669    p[4] = 0xD63F0120;
   6670 
   6671    VexInvalRange vir = {(HWord)p, 20};
   6672    return vir;
   6673 }
   6674 
   6675 
   6676 //ZZ /* Patch the counter address into a profile inc point, as previously
   6677 //ZZ    created by the ARMin_ProfInc case for emit_ARMInstr. */
   6678 //ZZ VexInvalRange patchProfInc_ARM ( void*  place_to_patch,
   6679 //ZZ                                  ULong* location_of_counter )
   6680 //ZZ {
   6681 //ZZ    vassert(sizeof(ULong*) == 4);
   6682 //ZZ    UInt* p = (UInt*)place_to_patch;
   6683 //ZZ    vassert(0 == (3 & (HWord)p));
   6684 //ZZ    vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
   6685 //ZZ    vassert(p[2] == 0xE59CB000);
   6686 //ZZ    vassert(p[3] == 0xE29BB001);
   6687 //ZZ    vassert(p[4] == 0xE58CB000);
   6688 //ZZ    vassert(p[5] == 0xE59CB004);
   6689 //ZZ    vassert(p[6] == 0xE2ABB000);
   6690 //ZZ    vassert(p[7] == 0xE58CB004);
   6691 //ZZ    imm32_to_iregNo_EXACTLY2(p, /*r*/12,
   6692 //ZZ                             (UInt)Ptr_to_ULong(location_of_counter));
   6693 //ZZ    VexInvalRange vir = {(HWord)p, 8};
   6694 //ZZ    return vir;
   6695 //ZZ }
   6696 //ZZ
   6697 //ZZ
   6698 //ZZ #undef BITS4
   6699 //ZZ #undef X0000
   6700 //ZZ #undef X0001
   6701 //ZZ #undef X0010
   6702 //ZZ #undef X0011
   6703 //ZZ #undef X0100
   6704 //ZZ #undef X0101
   6705 //ZZ #undef X0110
   6706 //ZZ #undef X0111
   6707 //ZZ #undef X1000
   6708 //ZZ #undef X1001
   6709 //ZZ #undef X1010
   6710 //ZZ #undef X1011
   6711 //ZZ #undef X1100
   6712 //ZZ #undef X1101
   6713 //ZZ #undef X1110
   6714 //ZZ #undef X1111
   6715 //ZZ #undef XXXXX___
   6716 //ZZ #undef XXXXXX__
   6717 //ZZ #undef XXX___XX
   6718 //ZZ #undef XXXXX__X
   6719 //ZZ #undef XXXXXXXX
   6720 //ZZ #undef XX______
   6721 
   6722 /*---------------------------------------------------------------*/
   6723 /*--- end                                   host_arm64_defs.c ---*/
   6724 /*---------------------------------------------------------------*/
   6725