Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_arm_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2011 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_arm_defs.h"
     43 
     44 UInt arm_hwcaps = 0;
     45 
     46 
     47 /* --------- Registers. --------- */
     48 
     49 /* The usual HReg abstraction.
     50    There are 16 general purpose regs.
     51 */
     52 
     53 void ppHRegARM ( HReg reg )  {
     54    Int r;
     55    /* Be generic for all virtual regs. */
     56    if (hregIsVirtual(reg)) {
     57       ppHReg(reg);
     58       return;
     59    }
     60    /* But specific for real regs. */
     61    switch (hregClass(reg)) {
     62       case HRcInt32:
     63          r = hregNumber(reg);
     64          vassert(r >= 0 && r < 16);
     65          vex_printf("r%d", r);
     66          return;
     67       case HRcFlt64:
     68          r = hregNumber(reg);
     69          vassert(r >= 0 && r < 32);
     70          vex_printf("d%d", r);
     71          return;
     72       case HRcFlt32:
     73          r = hregNumber(reg);
     74          vassert(r >= 0 && r < 32);
     75          vex_printf("s%d", r);
     76          return;
     77       case HRcVec128:
     78          r = hregNumber(reg);
     79          vassert(r >= 0 && r < 16);
     80          vex_printf("q%d", r);
     81          return;
     82       default:
     83          vpanic("ppHRegARM");
     84    }
     85 }
     86 
     87 HReg hregARM_R0  ( void ) { return mkHReg(0,  HRcInt32, False); }
     88 HReg hregARM_R1  ( void ) { return mkHReg(1,  HRcInt32, False); }
     89 HReg hregARM_R2  ( void ) { return mkHReg(2,  HRcInt32, False); }
     90 HReg hregARM_R3  ( void ) { return mkHReg(3,  HRcInt32, False); }
     91 HReg hregARM_R4  ( void ) { return mkHReg(4,  HRcInt32, False); }
     92 HReg hregARM_R5  ( void ) { return mkHReg(5,  HRcInt32, False); }
     93 HReg hregARM_R6  ( void ) { return mkHReg(6,  HRcInt32, False); }
     94 HReg hregARM_R7  ( void ) { return mkHReg(7,  HRcInt32, False); }
     95 HReg hregARM_R8  ( void ) { return mkHReg(8,  HRcInt32, False); }
     96 HReg hregARM_R9  ( void ) { return mkHReg(9,  HRcInt32, False); }
     97 HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
     98 HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
     99 HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
    100 HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
    101 HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
    102 HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
    103 HReg hregARM_D8  ( void ) { return mkHReg(8,  HRcFlt64, False); }
    104 HReg hregARM_D9  ( void ) { return mkHReg(9,  HRcFlt64, False); }
    105 HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
    106 HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
    107 HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
    108 HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
    109 HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
    110 HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
    111 HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
    112 HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
    113 HReg hregARM_Q8  ( void ) { return mkHReg(8,  HRcVec128, False); }
    114 HReg hregARM_Q9  ( void ) { return mkHReg(9,  HRcVec128, False); }
    115 HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
    116 HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
    117 HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
    118 HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
    119 HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
    120 HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
    121 
    122 void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
    123 {
    124    Int i = 0;
    125    *nregs = 26;
    126    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    127    // callee saves ones are listed first, since we prefer them
    128    // if they're available
    129    (*arr)[i++] = hregARM_R4();
    130    (*arr)[i++] = hregARM_R5();
    131    (*arr)[i++] = hregARM_R6();
    132    (*arr)[i++] = hregARM_R7();
    133    (*arr)[i++] = hregARM_R10();
    134    (*arr)[i++] = hregARM_R11();
    135    // otherwise we'll have to slum it out with caller-saves ones
    136    (*arr)[i++] = hregARM_R0();
    137    (*arr)[i++] = hregARM_R1();
    138    (*arr)[i++] = hregARM_R2();
    139    (*arr)[i++] = hregARM_R3();
    140    (*arr)[i++] = hregARM_R9();
    141    // FP hreegisters.  Note: these are all callee-save.  Yay!
    142    // Hence we don't need to mention them as trashed in
    143    // getHRegUsage for ARMInstr_Call.
    144    (*arr)[i++] = hregARM_D8();
    145    (*arr)[i++] = hregARM_D9();
    146    (*arr)[i++] = hregARM_D10();
    147    (*arr)[i++] = hregARM_D11();
    148    (*arr)[i++] = hregARM_D12();
    149    (*arr)[i++] = hregARM_S26();
    150    (*arr)[i++] = hregARM_S27();
    151    (*arr)[i++] = hregARM_S28();
    152    (*arr)[i++] = hregARM_S29();
    153    (*arr)[i++] = hregARM_S30();
    154 
    155    (*arr)[i++] = hregARM_Q8();
    156    (*arr)[i++] = hregARM_Q9();
    157    (*arr)[i++] = hregARM_Q10();
    158    (*arr)[i++] = hregARM_Q11();
    159    (*arr)[i++] = hregARM_Q12();
    160 
    161    //(*arr)[i++] = hregARM_Q13();
    162    //(*arr)[i++] = hregARM_Q14();
    163    //(*arr)[i++] = hregARM_Q15();
    164 
    165    // unavail: r8 as GSP
    166    // r12 is used as a spill/reload temporary
    167    // r13 as SP
    168    // r14 as LR
    169    // r15 as PC
    170    //
    171    // All in all, we have 11 allocatable integer registers:
    172    // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
    173    // and r12 dedicated as a spill temporary.
    174    // 13 14 and 15 are not under the allocator's control.
    175    //
    176    // Hence for the allocatable registers we have:
    177    //
    178    // callee-saved: 4 5 6 7 (8) 9 10 11
    179    // caller-saved: 0 1 2 3
    180    // Note 9 is ambiguous: the base EABI does not give an e/r-saved
    181    // designation for it, but the Linux instantiation of the ABI
    182    // specifies it as callee-saved.
    183    //
    184    // If the set of available registers changes or if the e/r status
    185    // changes, be sure to re-check/sync the definition of
    186    // getHRegUsage for ARMInstr_Call too.
    187    vassert(i == *nregs);
    188 }
    189 
    190 
    191 
    192 /* --------- Condition codes, ARM encoding. --------- */
    193 
    194 HChar* showARMCondCode ( ARMCondCode cond ) {
    195    switch (cond) {
    196        case ARMcc_EQ:  return "eq";
    197        case ARMcc_NE:  return "ne";
    198        case ARMcc_HS:  return "hs";
    199        case ARMcc_LO:  return "lo";
    200        case ARMcc_MI:  return "mi";
    201        case ARMcc_PL:  return "pl";
    202        case ARMcc_VS:  return "vs";
    203        case ARMcc_VC:  return "vc";
    204        case ARMcc_HI:  return "hi";
    205        case ARMcc_LS:  return "ls";
    206        case ARMcc_GE:  return "ge";
    207        case ARMcc_LT:  return "lt";
    208        case ARMcc_GT:  return "gt";
    209        case ARMcc_LE:  return "le";
    210        case ARMcc_AL:  return "al"; // default
    211        case ARMcc_NV:  return "nv";
    212        default: vpanic("showARMCondCode");
    213    }
    214 }
    215 
    216 
    217 /* --------- Mem AModes: Addressing Mode 1 --------- */
    218 
    219 ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
    220    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
    221    am->tag              = ARMam1_RI;
    222    am->ARMam1.RI.reg    = reg;
    223    am->ARMam1.RI.simm13 = simm13;
    224    vassert(-4095 <= simm13 && simm13 <= 4095);
    225    return am;
    226 }
    227 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
    228    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
    229    am->tag              = ARMam1_RRS;
    230    am->ARMam1.RRS.base  = base;
    231    am->ARMam1.RRS.index = index;
    232    am->ARMam1.RRS.shift = shift;
    233    vassert(0 <= shift && shift <= 3);
    234    return am;
    235 }
    236 
    237 void ppARMAMode1 ( ARMAMode1* am ) {
    238    switch (am->tag) {
    239       case ARMam1_RI:
    240          vex_printf("%d(", am->ARMam1.RI.simm13);
    241          ppHRegARM(am->ARMam1.RI.reg);
    242          vex_printf(")");
    243          break;
    244       case ARMam1_RRS:
    245          vex_printf("(");
    246          ppHRegARM(am->ARMam1.RRS.base);
    247          vex_printf(",");
    248          ppHRegARM(am->ARMam1.RRS.index);
    249          vex_printf(",%u)", am->ARMam1.RRS.shift);
    250          break;
    251       default:
    252          vassert(0);
    253    }
    254 }
    255 
    256 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
    257    switch (am->tag) {
    258       case ARMam1_RI:
    259          addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
    260          return;
    261       case ARMam1_RRS:
    262          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
    263          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
    264          //   return;
    265       default:
    266          vpanic("addRegUsage_ARMAmode1");
    267    }
    268 }
    269 
    270 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
    271    switch (am->tag) {
    272       case ARMam1_RI:
    273          am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
    274          return;
    275       case ARMam1_RRS:
    276          //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
    277          //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
    278          //return;
    279       default:
    280          vpanic("mapRegs_ARMAmode1");
    281    }
    282 }
    283 
    284 
    285 /* --------- Mem AModes: Addressing Mode 2 --------- */
    286 
    287 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
    288    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    289    am->tag             = ARMam2_RI;
    290    am->ARMam2.RI.reg   = reg;
    291    am->ARMam2.RI.simm9 = simm9;
    292    vassert(-255 <= simm9 && simm9 <= 255);
    293    return am;
    294 }
    295 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
    296    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    297    am->tag             = ARMam2_RR;
    298    am->ARMam2.RR.base  = base;
    299    am->ARMam2.RR.index = index;
    300    return am;
    301 }
    302 
    303 void ppARMAMode2 ( ARMAMode2* am ) {
    304    switch (am->tag) {
    305       case ARMam2_RI:
    306          vex_printf("%d(", am->ARMam2.RI.simm9);
    307          ppHRegARM(am->ARMam2.RI.reg);
    308          vex_printf(")");
    309          break;
    310       case ARMam2_RR:
    311          vex_printf("(");
    312          ppHRegARM(am->ARMam2.RR.base);
    313          vex_printf(",");
    314          ppHRegARM(am->ARMam2.RR.index);
    315          vex_printf(")");
    316          break;
    317       default:
    318          vassert(0);
    319    }
    320 }
    321 
    322 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
    323    switch (am->tag) {
    324       case ARMam2_RI:
    325          addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
    326          return;
    327       case ARMam2_RR:
    328          //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
    329          //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
    330          //   return;
    331       default:
    332          vpanic("addRegUsage_ARMAmode2");
    333    }
    334 }
    335 
    336 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
    337    switch (am->tag) {
    338       case ARMam2_RI:
    339          am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
    340          return;
    341       case ARMam2_RR:
    342          //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
    343          //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
    344          //return;
    345       default:
    346          vpanic("mapRegs_ARMAmode2");
    347    }
    348 }
    349 
    350 
    351 /* --------- Mem AModes: Addressing Mode VFP --------- */
    352 
    353 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
    354    ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
    355    vassert(simm11 >= -1020 && simm11 <= 1020);
    356    vassert(0 == (simm11 & 3));
    357    am->reg    = reg;
    358    am->simm11 = simm11;
    359    return am;
    360 }
    361 
    362 void ppARMAModeV ( ARMAModeV* am ) {
    363    vex_printf("%d(", am->simm11);
    364    ppHRegARM(am->reg);
    365    vex_printf(")");
    366 }
    367 
    368 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
    369    addHRegUse(u, HRmRead, am->reg);
    370 }
    371 
    372 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
    373    am->reg = lookupHRegRemap(m, am->reg);
    374 }
    375 
    376 
    377 /* --------- Mem AModes: Addressing Mode Neon ------- */
    378 
    379 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
    380    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    381    am->tag = ARMamN_RR;
    382    am->ARMamN.RR.rN = rN;
    383    am->ARMamN.RR.rM = rM;
    384    return am;
    385 }
    386 
    387 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
    388    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    389    am->tag = ARMamN_R;
    390    am->ARMamN.R.rN = rN;
    391    return am;
    392 }
    393 
    394 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
    395    if (am->tag == ARMamN_R) {
    396       addHRegUse(u, HRmRead, am->ARMamN.R.rN);
    397    } else {
    398       addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
    399       addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
    400    }
    401 }
    402 
    403 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
    404    if (am->tag == ARMamN_R) {
    405       am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
    406    } else {
    407       am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
    408       am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
    409    }
    410 }
    411 
    412 void ppARMAModeN ( ARMAModeN* am ) {
    413    vex_printf("[");
    414    if (am->tag == ARMamN_R) {
    415       ppHRegARM(am->ARMamN.R.rN);
    416    } else {
    417       ppHRegARM(am->ARMamN.RR.rN);
    418    }
    419    vex_printf("]");
    420    if (am->tag == ARMamN_RR) {
    421       vex_printf(", ");
    422       ppHRegARM(am->ARMamN.RR.rM);
    423    }
    424 }
    425 
    426 
    427 /* --------- Reg or imm-8x4 operands --------- */
    428 
    429 static UInt ROR32 ( UInt x, UInt sh ) {
    430    vassert(sh >= 0 && sh < 32);
    431    if (sh == 0)
    432       return x;
    433    else
    434       return (x << (32-sh)) | (x >> sh);
    435 }
    436 
    437 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
    438    ARMRI84* ri84          = LibVEX_Alloc(sizeof(ARMRI84));
    439    ri84->tag              = ARMri84_I84;
    440    ri84->ARMri84.I84.imm8 = imm8;
    441    ri84->ARMri84.I84.imm4 = imm4;
    442    vassert(imm8 >= 0 && imm8 <= 255);
    443    vassert(imm4 >= 0 && imm4 <= 15);
    444    return ri84;
    445 }
    446 ARMRI84* ARMRI84_R ( HReg reg ) {
    447    ARMRI84* ri84       = LibVEX_Alloc(sizeof(ARMRI84));
    448    ri84->tag           = ARMri84_R;
    449    ri84->ARMri84.R.reg = reg;
    450    return ri84;
    451 }
    452 
    453 void ppARMRI84 ( ARMRI84* ri84 ) {
    454    switch (ri84->tag) {
    455       case ARMri84_I84:
    456          vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
    457                                   2 * ri84->ARMri84.I84.imm4));
    458          break;
    459       case ARMri84_R:
    460          ppHRegARM(ri84->ARMri84.R.reg);
    461          break;
    462       default:
    463          vassert(0);
    464    }
    465 }
    466 
    467 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
    468    switch (ri84->tag) {
    469       case ARMri84_I84:
    470          return;
    471       case ARMri84_R:
    472          addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
    473          return;
    474       default:
    475          vpanic("addRegUsage_ARMRI84");
    476    }
    477 }
    478 
    479 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
    480    switch (ri84->tag) {
    481       case ARMri84_I84:
    482          return;
    483       case ARMri84_R:
    484          ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
    485          return;
    486       default:
    487          vpanic("mapRegs_ARMRI84");
    488    }
    489 }
    490 
    491 
    492 /* --------- Reg or imm5 operands --------- */
    493 
    494 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
    495    ARMRI5* ri5         = LibVEX_Alloc(sizeof(ARMRI5));
    496    ri5->tag            = ARMri5_I5;
    497    ri5->ARMri5.I5.imm5 = imm5;
    498    vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
    499    return ri5;
    500 }
    501 ARMRI5* ARMRI5_R ( HReg reg ) {
    502    ARMRI5* ri5       = LibVEX_Alloc(sizeof(ARMRI5));
    503    ri5->tag          = ARMri5_R;
    504    ri5->ARMri5.R.reg = reg;
    505    return ri5;
    506 }
    507 
    508 void ppARMRI5 ( ARMRI5* ri5 ) {
    509    switch (ri5->tag) {
    510       case ARMri5_I5:
    511          vex_printf("%u", ri5->ARMri5.I5.imm5);
    512          break;
    513       case ARMri5_R:
    514          ppHRegARM(ri5->ARMri5.R.reg);
    515          break;
    516       default:
    517          vassert(0);
    518    }
    519 }
    520 
    521 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
    522    switch (ri5->tag) {
    523       case ARMri5_I5:
    524          return;
    525       case ARMri5_R:
    526          addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
    527          return;
    528       default:
    529          vpanic("addRegUsage_ARMRI5");
    530    }
    531 }
    532 
    533 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
    534    switch (ri5->tag) {
    535       case ARMri5_I5:
    536          return;
    537       case ARMri5_R:
    538          ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
    539          return;
    540       default:
    541          vpanic("mapRegs_ARMRI5");
    542    }
    543 }
    544 
    545 /* -------- Neon Immediate operatnd --------- */
    546 
    547 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
    548    ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
    549    i->type = type;
    550    i->imm8 = imm8;
    551    return i;
    552 }
    553 
    554 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
    555    int i, j;
    556    ULong y, x = imm->imm8;
    557    switch (imm->type) {
    558       case 3:
    559          x = x << 8;
    560       case 2:
    561          x = x << 8;
    562       case 1:
    563          x = x << 8;
    564       case 0:
    565          return (x << 32) | x;
    566       case 5:
    567       case 6:
    568          if (imm->type == 5)
    569             x = x << 8;
    570          else
    571             x = (x << 8) | x;
    572       case 4:
    573          x = (x << 16) | x;
    574          return (x << 32) | x;
    575       case 8:
    576          x = (x << 8) | 0xFF;
    577       case 7:
    578          x = (x << 8) | 0xFF;
    579          return (x << 32) | x;
    580       case 9:
    581          x = 0;
    582          for (i = 7; i >= 0; i--) {
    583             y = ((ULong)imm->imm8 >> i) & 1;
    584             for (j = 0; j < 8; j++) {
    585                x = (x << 1) | y;
    586             }
    587          }
    588          return x;
    589       case 10:
    590          x |= (x & 0x80) << 5;
    591          x |= (~x & 0x40) << 5;
    592          x &= 0x187F; /* 0001 1000 0111 1111 */
    593          x |= (x & 0x40) << 4;
    594          x |= (x & 0x40) << 3;
    595          x |= (x & 0x40) << 2;
    596          x |= (x & 0x40) << 1;
    597          x = x << 19;
    598          x = (x << 32) | x;
    599          return x;
    600       default:
    601          vpanic("ARMNImm_to_Imm64");
    602    }
    603 }
    604 
    605 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
    606    ARMNImm tmp;
    607    if ((x & 0xFFFFFFFF) == (x >> 32)) {
    608       if ((x & 0xFFFFFF00) == 0)
    609          return ARMNImm_TI(0, x & 0xFF);
    610       if ((x & 0xFFFF00FF) == 0)
    611          return ARMNImm_TI(1, (x >> 8) & 0xFF);
    612       if ((x & 0xFF00FFFF) == 0)
    613          return ARMNImm_TI(2, (x >> 16) & 0xFF);
    614       if ((x & 0x00FFFFFF) == 0)
    615          return ARMNImm_TI(3, (x >> 24) & 0xFF);
    616       if ((x & 0xFFFF00FF) == 0xFF)
    617          return ARMNImm_TI(7, (x >> 8) & 0xFF);
    618       if ((x & 0xFF00FFFF) == 0xFFFF)
    619          return ARMNImm_TI(8, (x >> 16) & 0xFF);
    620       if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
    621          if ((x & 0xFF00) == 0)
    622             return ARMNImm_TI(4, x & 0xFF);
    623          if ((x & 0x00FF) == 0)
    624             return ARMNImm_TI(5, (x >> 8) & 0xFF);
    625          if ((x & 0xFF) == ((x >> 8) & 0xFF))
    626             return ARMNImm_TI(6, x & 0xFF);
    627       }
    628       if ((x & 0x7FFFF) == 0) {
    629          tmp.type = 10;
    630          tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
    631          if (ARMNImm_to_Imm64(&tmp) == x)
    632             return ARMNImm_TI(tmp.type, tmp.imm8);
    633       }
    634    } else {
    635       /* This can only be type 9. */
    636       tmp.imm8 = (((x >> 56) & 1) << 7)
    637                | (((x >> 48) & 1) << 6)
    638                | (((x >> 40) & 1) << 5)
    639                | (((x >> 32) & 1) << 4)
    640                | (((x >> 24) & 1) << 3)
    641                | (((x >> 16) & 1) << 2)
    642                | (((x >>  8) & 1) << 1)
    643                | (((x >>  0) & 1) << 0);
    644       tmp.type = 9;
    645       if (ARMNImm_to_Imm64 (&tmp) == x)
    646          return ARMNImm_TI(tmp.type, tmp.imm8);
    647    }
    648    return NULL;
    649 }
    650 
    651 void ppARMNImm (ARMNImm* i) {
    652    ULong x = ARMNImm_to_Imm64(i);
    653    vex_printf("0x%llX%llX", x, x);
    654 }
    655 
    656 /* -- Register or scalar operand --- */
    657 
    658 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
    659 {
    660    ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
    661    p->tag = tag;
    662    p->reg = reg;
    663    p->index = index;
    664    return p;
    665 }
    666 
    667 void ppARMNRS(ARMNRS *p)
    668 {
    669    ppHRegARM(p->reg);
    670    if (p->tag == ARMNRS_Scalar) {
    671       vex_printf("[%d]", p->index);
    672    }
    673 }
    674 
    675 /* --------- Instructions. --------- */
    676 
    677 HChar* showARMAluOp ( ARMAluOp op ) {
    678    switch (op) {
    679       case ARMalu_ADD:  return "add";
    680       case ARMalu_ADDS: return "adds";
    681       case ARMalu_ADC:  return "adc";
    682       case ARMalu_SUB:  return "sub";
    683       case ARMalu_SUBS: return "subs";
    684       case ARMalu_SBC:  return "sbc";
    685       case ARMalu_AND:  return "and";
    686       case ARMalu_BIC:  return "bic";
    687       case ARMalu_OR:   return "orr";
    688       case ARMalu_XOR:  return "xor";
    689       default: vpanic("showARMAluOp");
    690    }
    691 }
    692 
    693 HChar* showARMShiftOp ( ARMShiftOp op ) {
    694    switch (op) {
    695       case ARMsh_SHL: return "shl";
    696       case ARMsh_SHR: return "shr";
    697       case ARMsh_SAR: return "sar";
    698       default: vpanic("showARMShiftOp");
    699    }
    700 }
    701 
    702 HChar* showARMUnaryOp ( ARMUnaryOp op ) {
    703    switch (op) {
    704       case ARMun_NEG: return "neg";
    705       case ARMun_NOT: return "not";
    706       case ARMun_CLZ: return "clz";
    707       default: vpanic("showARMUnaryOp");
    708    }
    709 }
    710 
    711 HChar* showARMMulOp ( ARMMulOp op ) {
    712    switch (op) {
    713       case ARMmul_PLAIN: return "mul";
    714       case ARMmul_ZX:    return "umull";
    715       case ARMmul_SX:    return "smull";
    716       default: vpanic("showARMMulOp");
    717    }
    718 }
    719 
    720 HChar* showARMVfpOp ( ARMVfpOp op ) {
    721    switch (op) {
    722       case ARMvfp_ADD: return "add";
    723       case ARMvfp_SUB: return "sub";
    724       case ARMvfp_MUL: return "mul";
    725       case ARMvfp_DIV: return "div";
    726       default: vpanic("showARMVfpOp");
    727    }
    728 }
    729 
    730 HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
    731    switch (op) {
    732       case ARMvfpu_COPY: return "cpy";
    733       case ARMvfpu_NEG:  return "neg";
    734       case ARMvfpu_ABS:  return "abs";
    735       case ARMvfpu_SQRT: return "sqrt";
    736       default: vpanic("showARMVfpUnaryOp");
    737    }
    738 }
    739 
    740 HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
    741    switch (op) {
    742       case ARMneon_VAND: return "vand";
    743       case ARMneon_VORR: return "vorr";
    744       case ARMneon_VXOR: return "veor";
    745       case ARMneon_VADD: return "vadd";
    746       case ARMneon_VRHADDS: return "vrhadd";
    747       case ARMneon_VRHADDU: return "vrhadd";
    748       case ARMneon_VADDFP: return "vadd";
    749       case ARMneon_VPADDFP: return "vpadd";
    750       case ARMneon_VABDFP: return "vabd";
    751       case ARMneon_VSUB: return "vsub";
    752       case ARMneon_VSUBFP: return "vsub";
    753       case ARMneon_VMINU: return "vmin";
    754       case ARMneon_VMINS: return "vmin";
    755       case ARMneon_VMINF: return "vmin";
    756       case ARMneon_VMAXU: return "vmax";
    757       case ARMneon_VMAXS: return "vmax";
    758       case ARMneon_VMAXF: return "vmax";
    759       case ARMneon_VQADDU: return "vqadd";
    760       case ARMneon_VQADDS: return "vqadd";
    761       case ARMneon_VQSUBU: return "vqsub";
    762       case ARMneon_VQSUBS: return "vqsub";
    763       case ARMneon_VCGTU:  return "vcgt";
    764       case ARMneon_VCGTS:  return "vcgt";
    765       case ARMneon_VCGTF:  return "vcgt";
    766       case ARMneon_VCGEF:  return "vcgt";
    767       case ARMneon_VCGEU:  return "vcge";
    768       case ARMneon_VCGES:  return "vcge";
    769       case ARMneon_VCEQ:  return "vceq";
    770       case ARMneon_VCEQF:  return "vceq";
    771       case ARMneon_VPADD:   return "vpadd";
    772       case ARMneon_VPMINU:   return "vpmin";
    773       case ARMneon_VPMINS:   return "vpmin";
    774       case ARMneon_VPMINF:   return "vpmin";
    775       case ARMneon_VPMAXU:   return "vpmax";
    776       case ARMneon_VPMAXS:   return "vpmax";
    777       case ARMneon_VPMAXF:   return "vpmax";
    778       case ARMneon_VEXT:   return "vext";
    779       case ARMneon_VMUL:   return "vmuli";
    780       case ARMneon_VMULLU:   return "vmull";
    781       case ARMneon_VMULLS:   return "vmull";
    782       case ARMneon_VMULP:  return "vmul";
    783       case ARMneon_VMULFP:  return "vmul";
    784       case ARMneon_VMULLP:  return "vmul";
    785       case ARMneon_VQDMULH: return "vqdmulh";
    786       case ARMneon_VQRDMULH: return "vqrdmulh";
    787       case ARMneon_VQDMULL: return "vqdmull";
    788       case ARMneon_VTBL: return "vtbl";
    789       case ARMneon_VRECPS: return "vrecps";
    790       case ARMneon_VRSQRTS: return "vrecps";
    791       /* ... */
    792       default: vpanic("showARMNeonBinOp");
    793    }
    794 }
    795 
    796 HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
    797    switch (op) {
    798       case ARMneon_VAND:
    799       case ARMneon_VORR:
    800       case ARMneon_VXOR:
    801          return "";
    802       case ARMneon_VADD:
    803       case ARMneon_VSUB:
    804       case ARMneon_VEXT:
    805       case ARMneon_VMUL:
    806       case ARMneon_VPADD:
    807       case ARMneon_VTBL:
    808       case ARMneon_VCEQ:
    809          return ".i";
    810       case ARMneon_VRHADDU:
    811       case ARMneon_VMINU:
    812       case ARMneon_VMAXU:
    813       case ARMneon_VQADDU:
    814       case ARMneon_VQSUBU:
    815       case ARMneon_VCGTU:
    816       case ARMneon_VCGEU:
    817       case ARMneon_VMULLU:
    818       case ARMneon_VPMINU:
    819       case ARMneon_VPMAXU:
    820          return ".u";
    821       case ARMneon_VRHADDS:
    822       case ARMneon_VMINS:
    823       case ARMneon_VMAXS:
    824       case ARMneon_VQADDS:
    825       case ARMneon_VQSUBS:
    826       case ARMneon_VCGTS:
    827       case ARMneon_VCGES:
    828       case ARMneon_VQDMULL:
    829       case ARMneon_VMULLS:
    830       case ARMneon_VPMINS:
    831       case ARMneon_VPMAXS:
    832       case ARMneon_VQDMULH:
    833       case ARMneon_VQRDMULH:
    834          return ".s";
    835       case ARMneon_VMULP:
    836       case ARMneon_VMULLP:
    837          return ".p";
    838       case ARMneon_VADDFP:
    839       case ARMneon_VABDFP:
    840       case ARMneon_VPADDFP:
    841       case ARMneon_VSUBFP:
    842       case ARMneon_VMULFP:
    843       case ARMneon_VMINF:
    844       case ARMneon_VMAXF:
    845       case ARMneon_VPMINF:
    846       case ARMneon_VPMAXF:
    847       case ARMneon_VCGTF:
    848       case ARMneon_VCGEF:
    849       case ARMneon_VCEQF:
    850       case ARMneon_VRECPS:
    851       case ARMneon_VRSQRTS:
    852          return ".f";
    853       /* ... */
    854       default: vpanic("showARMNeonBinOpDataType");
    855    }
    856 }
    857 
    858 HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
    859    switch (op) {
    860       case ARMneon_COPY: return "vmov";
    861       case ARMneon_COPYLS: return "vmov";
    862       case ARMneon_COPYLU: return "vmov";
    863       case ARMneon_COPYN: return "vmov";
    864       case ARMneon_COPYQNSS: return "vqmovn";
    865       case ARMneon_COPYQNUS: return "vqmovun";
    866       case ARMneon_COPYQNUU: return "vqmovn";
    867       case ARMneon_NOT: return "vmvn";
    868       case ARMneon_EQZ: return "vceq";
    869       case ARMneon_CNT: return "vcnt";
    870       case ARMneon_CLS: return "vcls";
    871       case ARMneon_CLZ: return "vclz";
    872       case ARMneon_DUP: return "vdup";
    873       case ARMneon_PADDLS: return "vpaddl";
    874       case ARMneon_PADDLU: return "vpaddl";
    875       case ARMneon_VQSHLNSS: return "vqshl";
    876       case ARMneon_VQSHLNUU: return "vqshl";
    877       case ARMneon_VQSHLNUS: return "vqshlu";
    878       case ARMneon_REV16: return "vrev16";
    879       case ARMneon_REV32: return "vrev32";
    880       case ARMneon_REV64: return "vrev64";
    881       case ARMneon_VCVTFtoU: return "vcvt";
    882       case ARMneon_VCVTFtoS: return "vcvt";
    883       case ARMneon_VCVTUtoF: return "vcvt";
    884       case ARMneon_VCVTStoF: return "vcvt";
    885       case ARMneon_VCVTFtoFixedU: return "vcvt";
    886       case ARMneon_VCVTFtoFixedS: return "vcvt";
    887       case ARMneon_VCVTFixedUtoF: return "vcvt";
    888       case ARMneon_VCVTFixedStoF: return "vcvt";
    889       case ARMneon_VCVTF32toF16: return "vcvt";
    890       case ARMneon_VCVTF16toF32: return "vcvt";
    891       case ARMneon_VRECIP: return "vrecip";
    892       case ARMneon_VRECIPF: return "vrecipf";
    893       case ARMneon_VNEGF: return "vneg";
    894       case ARMneon_ABS: return "vabs";
    895       case ARMneon_VABSFP: return "vabsfp";
    896       case ARMneon_VRSQRTEFP: return "vrsqrtefp";
    897       case ARMneon_VRSQRTE: return "vrsqrte";
    898       /* ... */
    899       default: vpanic("showARMNeonUnOp");
    900    }
    901 }
    902 
    903 HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
    904    switch (op) {
    905       case ARMneon_COPY:
    906       case ARMneon_NOT:
    907          return "";
    908       case ARMneon_COPYN:
    909       case ARMneon_EQZ:
    910       case ARMneon_CNT:
    911       case ARMneon_DUP:
    912       case ARMneon_REV16:
    913       case ARMneon_REV32:
    914       case ARMneon_REV64:
    915          return ".i";
    916       case ARMneon_COPYLU:
    917       case ARMneon_PADDLU:
    918       case ARMneon_COPYQNUU:
    919       case ARMneon_VQSHLNUU:
    920       case ARMneon_VRECIP:
    921       case ARMneon_VRSQRTE:
    922          return ".u";
    923       case ARMneon_CLS:
    924       case ARMneon_CLZ:
    925       case ARMneon_COPYLS:
    926       case ARMneon_PADDLS:
    927       case ARMneon_COPYQNSS:
    928       case ARMneon_COPYQNUS:
    929       case ARMneon_VQSHLNSS:
    930       case ARMneon_VQSHLNUS:
    931       case ARMneon_ABS:
    932          return ".s";
    933       case ARMneon_VRECIPF:
    934       case ARMneon_VNEGF:
    935       case ARMneon_VABSFP:
    936       case ARMneon_VRSQRTEFP:
    937          return ".f";
    938       case ARMneon_VCVTFtoU: return ".u32.f32";
    939       case ARMneon_VCVTFtoS: return ".s32.f32";
    940       case ARMneon_VCVTUtoF: return ".f32.u32";
    941       case ARMneon_VCVTStoF: return ".f32.s32";
    942       case ARMneon_VCVTF16toF32: return ".f32.f16";
    943       case ARMneon_VCVTF32toF16: return ".f16.f32";
    944       case ARMneon_VCVTFtoFixedU: return ".u32.f32";
    945       case ARMneon_VCVTFtoFixedS: return ".s32.f32";
    946       case ARMneon_VCVTFixedUtoF: return ".f32.u32";
    947       case ARMneon_VCVTFixedStoF: return ".f32.s32";
    948       /* ... */
    949       default: vpanic("showARMNeonUnOpDataType");
    950    }
    951 }
    952 
    953 HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
    954    switch (op) {
    955       case ARMneon_SETELEM: return "vmov";
    956       case ARMneon_GETELEMU: return "vmov";
    957       case ARMneon_GETELEMS: return "vmov";
    958       case ARMneon_VDUP: return "vdup";
    959       /* ... */
    960       default: vpanic("showARMNeonUnarySOp");
    961    }
    962 }
    963 
    964 HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
    965    switch (op) {
    966       case ARMneon_SETELEM:
    967       case ARMneon_VDUP:
    968          return ".i";
    969       case ARMneon_GETELEMS:
    970          return ".s";
    971       case ARMneon_GETELEMU:
    972          return ".u";
    973       /* ... */
    974       default: vpanic("showARMNeonUnarySOp");
    975    }
    976 }
    977 
    978 HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
    979    switch (op) {
    980       case ARMneon_VSHL: return "vshl";
    981       case ARMneon_VSAL: return "vshl";
    982       case ARMneon_VQSHL: return "vqshl";
    983       case ARMneon_VQSAL: return "vqshl";
    984       /* ... */
    985       default: vpanic("showARMNeonShiftOp");
    986    }
    987 }
    988 
    989 HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
    990    switch (op) {
    991       case ARMneon_VSHL:
    992       case ARMneon_VQSHL:
    993          return ".u";
    994       case ARMneon_VSAL:
    995       case ARMneon_VQSAL:
    996          return ".s";
    997       /* ... */
    998       default: vpanic("showARMNeonShiftOpDataType");
    999    }
   1000 }
   1001 
   1002 HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
   1003    switch (op) {
   1004       case ARMneon_TRN: return "vtrn";
   1005       case ARMneon_ZIP: return "vzip";
   1006       case ARMneon_UZP: return "vuzp";
   1007       /* ... */
   1008       default: vpanic("showARMNeonDualOp");
   1009    }
   1010 }
   1011 
   1012 HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
   1013    switch (op) {
   1014       case ARMneon_TRN:
   1015       case ARMneon_ZIP:
   1016       case ARMneon_UZP:
   1017          return "i";
   1018       /* ... */
   1019       default: vpanic("showARMNeonDualOp");
   1020    }
   1021 }
   1022 
   1023 static HChar* showARMNeonDataSize_wrk ( UInt size )
   1024 {
   1025    switch (size) {
   1026       case 0: return "8";
   1027       case 1: return "16";
   1028       case 2: return "32";
   1029       case 3: return "64";
   1030       default: vpanic("showARMNeonDataSize");
   1031    }
   1032 }
   1033 
   1034 static HChar* showARMNeonDataSize ( ARMInstr* i )
   1035 {
   1036    switch (i->tag) {
   1037       case ARMin_NBinary:
   1038          if (i->ARMin.NBinary.op == ARMneon_VEXT)
   1039             return "8";
   1040          if (i->ARMin.NBinary.op == ARMneon_VAND ||
   1041              i->ARMin.NBinary.op == ARMneon_VORR ||
   1042              i->ARMin.NBinary.op == ARMneon_VXOR)
   1043             return "";
   1044          return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
   1045       case ARMin_NUnary:
   1046          if (i->ARMin.NUnary.op == ARMneon_COPY ||
   1047              i->ARMin.NUnary.op == ARMneon_NOT ||
   1048              i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
   1049              i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
   1050              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1051              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1052              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1053              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
   1054              i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
   1055              i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
   1056              i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
   1057              i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
   1058             return "";
   1059          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1060              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1061              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1062             UInt size;
   1063             size = i->ARMin.NUnary.size;
   1064             if (size & 0x40)
   1065                return "64";
   1066             if (size & 0x20)
   1067                return "32";
   1068             if (size & 0x10)
   1069                return "16";
   1070             if (size & 0x08)
   1071                return "8";
   1072             vpanic("showARMNeonDataSize");
   1073          }
   1074          return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
   1075       case ARMin_NUnaryS:
   1076          if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
   1077             int size;
   1078             size = i->ARMin.NUnaryS.size;
   1079             if ((size & 1) == 1)
   1080                return "8";
   1081             if ((size & 3) == 2)
   1082                return "16";
   1083             if ((size & 7) == 4)
   1084                return "32";
   1085             vpanic("showARMNeonDataSize");
   1086          }
   1087          return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
   1088       case ARMin_NShift:
   1089          return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
   1090       case ARMin_NDual:
   1091          return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
   1092       default:
   1093          vpanic("showARMNeonDataSize");
   1094    }
   1095 }
   1096 
   1097 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
   1098                          HReg dst, HReg argL, ARMRI84* argR ) {
   1099    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1100    i->tag            = ARMin_Alu;
   1101    i->ARMin.Alu.op   = op;
   1102    i->ARMin.Alu.dst  = dst;
   1103    i->ARMin.Alu.argL = argL;
   1104    i->ARMin.Alu.argR = argR;
   1105    return i;
   1106 }
   1107 ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
   1108                             HReg dst, HReg argL, ARMRI5* argR ) {
   1109    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1110    i->tag              = ARMin_Shift;
   1111    i->ARMin.Shift.op   = op;
   1112    i->ARMin.Shift.dst  = dst;
   1113    i->ARMin.Shift.argL = argL;
   1114    i->ARMin.Shift.argR = argR;
   1115    return i;
   1116 }
   1117 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
   1118    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1119    i->tag             = ARMin_Unary;
   1120    i->ARMin.Unary.op  = op;
   1121    i->ARMin.Unary.dst = dst;
   1122    i->ARMin.Unary.src = src;
   1123    return i;
   1124 }
   1125 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
   1126    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1127    i->tag                  = ARMin_CmpOrTst;
   1128    i->ARMin.CmpOrTst.isCmp = isCmp;
   1129    i->ARMin.CmpOrTst.argL  = argL;
   1130    i->ARMin.CmpOrTst.argR  = argR;
   1131    return i;
   1132 }
   1133 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
   1134    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1135    i->tag           = ARMin_Mov;
   1136    i->ARMin.Mov.dst = dst;
   1137    i->ARMin.Mov.src = src;
   1138    return i;
   1139 }
   1140 ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
   1141    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1142    i->tag               = ARMin_Imm32;
   1143    i->ARMin.Imm32.dst   = dst;
   1144    i->ARMin.Imm32.imm32 = imm32;
   1145    return i;
   1146 }
   1147 ARMInstr* ARMInstr_LdSt32 ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1148    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1149    i->tag                 = ARMin_LdSt32;
   1150    i->ARMin.LdSt32.isLoad = isLoad;
   1151    i->ARMin.LdSt32.rD     = rD;
   1152    i->ARMin.LdSt32.amode  = amode;
   1153    return i;
   1154 }
   1155 ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad,
   1156                             HReg rD, ARMAMode2* amode ) {
   1157    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1158    i->tag                     = ARMin_LdSt16;
   1159    i->ARMin.LdSt16.isLoad     = isLoad;
   1160    i->ARMin.LdSt16.signedLoad = signedLoad;
   1161    i->ARMin.LdSt16.rD         = rD;
   1162    i->ARMin.LdSt16.amode      = amode;
   1163    return i;
   1164 }
   1165 ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1166    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1167    i->tag                 = ARMin_LdSt8U;
   1168    i->ARMin.LdSt8U.isLoad = isLoad;
   1169    i->ARMin.LdSt8U.rD     = rD;
   1170    i->ARMin.LdSt8U.amode  = amode;
   1171    return i;
   1172 }
   1173 //extern ARMInstr* ARMInstr_Ld8S   ( HReg, ARMAMode2* );
   1174 ARMInstr* ARMInstr_Goto ( IRJumpKind jk, ARMCondCode cond, HReg gnext ) {
   1175    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1176    i->tag              = ARMin_Goto;
   1177    i->ARMin.Goto.jk    = jk;
   1178    i->ARMin.Goto.cond  = cond;
   1179    i->ARMin.Goto.gnext = gnext;
   1180    return i;
   1181 }
   1182 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
   1183    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1184    i->tag             = ARMin_CMov;
   1185    i->ARMin.CMov.cond = cond;
   1186    i->ARMin.CMov.dst  = dst;
   1187    i->ARMin.CMov.src  = src;
   1188    vassert(cond != ARMcc_AL);
   1189    return i;
   1190 }
   1191 ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs ) {
   1192    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1193    i->tag                 = ARMin_Call;
   1194    i->ARMin.Call.cond     = cond;
   1195    i->ARMin.Call.target   = target;
   1196    i->ARMin.Call.nArgRegs = nArgRegs;
   1197    return i;
   1198 }
   1199 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
   1200    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1201    i->tag          = ARMin_Mul;
   1202    i->ARMin.Mul.op = op;
   1203    return i;
   1204 }
   1205 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
   1206    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1207    i->tag             = ARMin_LdrEX;
   1208    i->ARMin.LdrEX.szB = szB;
   1209    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1210    return i;
   1211 }
   1212 ARMInstr* ARMInstr_StrEX ( Int szB ) {
   1213    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1214    i->tag             = ARMin_StrEX;
   1215    i->ARMin.StrEX.szB = szB;
   1216    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1217    return i;
   1218 }
   1219 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
   1220    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1221    i->tag                 = ARMin_VLdStD;
   1222    i->ARMin.VLdStD.isLoad = isLoad;
   1223    i->ARMin.VLdStD.dD     = dD;
   1224    i->ARMin.VLdStD.amode  = am;
   1225    return i;
   1226 }
   1227 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
   1228    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1229    i->tag                 = ARMin_VLdStS;
   1230    i->ARMin.VLdStS.isLoad = isLoad;
   1231    i->ARMin.VLdStS.fD     = fD;
   1232    i->ARMin.VLdStS.amode  = am;
   1233    return i;
   1234 }
   1235 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1236    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1237    i->tag              = ARMin_VAluD;
   1238    i->ARMin.VAluD.op   = op;
   1239    i->ARMin.VAluD.dst  = dst;
   1240    i->ARMin.VAluD.argL = argL;
   1241    i->ARMin.VAluD.argR = argR;
   1242    return i;
   1243 }
   1244 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1245    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1246    i->tag              = ARMin_VAluS;
   1247    i->ARMin.VAluS.op   = op;
   1248    i->ARMin.VAluS.dst  = dst;
   1249    i->ARMin.VAluS.argL = argL;
   1250    i->ARMin.VAluS.argR = argR;
   1251    return i;
   1252 }
   1253 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1254    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1255    i->tag               = ARMin_VUnaryD;
   1256    i->ARMin.VUnaryD.op  = op;
   1257    i->ARMin.VUnaryD.dst = dst;
   1258    i->ARMin.VUnaryD.src = src;
   1259    return i;
   1260 }
   1261 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1262    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1263    i->tag               = ARMin_VUnaryS;
   1264    i->ARMin.VUnaryS.op  = op;
   1265    i->ARMin.VUnaryS.dst = dst;
   1266    i->ARMin.VUnaryS.src = src;
   1267    return i;
   1268 }
   1269 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
   1270    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1271    i->tag              = ARMin_VCmpD;
   1272    i->ARMin.VCmpD.argL = argL;
   1273    i->ARMin.VCmpD.argR = argR;
   1274    return i;
   1275 }
   1276 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
   1277    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1278    i->tag               = ARMin_VCMovD;
   1279    i->ARMin.VCMovD.cond = cond;
   1280    i->ARMin.VCMovD.dst  = dst;
   1281    i->ARMin.VCMovD.src  = src;
   1282    vassert(cond != ARMcc_AL);
   1283    return i;
   1284 }
   1285 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
   1286    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1287    i->tag               = ARMin_VCMovS;
   1288    i->ARMin.VCMovS.cond = cond;
   1289    i->ARMin.VCMovS.dst  = dst;
   1290    i->ARMin.VCMovS.src  = src;
   1291    vassert(cond != ARMcc_AL);
   1292    return i;
   1293 }
   1294 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
   1295    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1296    i->tag               = ARMin_VCvtSD;
   1297    i->ARMin.VCvtSD.sToD = sToD;
   1298    i->ARMin.VCvtSD.dst  = dst;
   1299    i->ARMin.VCvtSD.src  = src;
   1300    return i;
   1301 }
   1302 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
   1303    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1304    i->tag              = ARMin_VXferD;
   1305    i->ARMin.VXferD.toD = toD;
   1306    i->ARMin.VXferD.dD  = dD;
   1307    i->ARMin.VXferD.rHi = rHi;
   1308    i->ARMin.VXferD.rLo = rLo;
   1309    return i;
   1310 }
   1311 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
   1312    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1313    i->tag              = ARMin_VXferS;
   1314    i->ARMin.VXferS.toS = toS;
   1315    i->ARMin.VXferS.fD  = fD;
   1316    i->ARMin.VXferS.rLo = rLo;
   1317    return i;
   1318 }
   1319 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
   1320                             HReg dst, HReg src ) {
   1321    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1322    i->tag                = ARMin_VCvtID;
   1323    i->ARMin.VCvtID.iToD  = iToD;
   1324    i->ARMin.VCvtID.syned = syned;
   1325    i->ARMin.VCvtID.dst   = dst;
   1326    i->ARMin.VCvtID.src   = src;
   1327    return i;
   1328 }
   1329 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
   1330    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1331    i->tag                 = ARMin_FPSCR;
   1332    i->ARMin.FPSCR.toFPSCR = toFPSCR;
   1333    i->ARMin.FPSCR.iReg    = iReg;
   1334    return i;
   1335 }
   1336 ARMInstr* ARMInstr_MFence ( void ) {
   1337    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1338    i->tag      = ARMin_MFence;
   1339    return i;
   1340 }
   1341 ARMInstr* ARMInstr_CLREX( void ) {
   1342    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1343    i->tag      = ARMin_CLREX;
   1344    return i;
   1345 }
   1346 
   1347 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
   1348    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1349    i->tag                  = ARMin_NLdStQ;
   1350    i->ARMin.NLdStQ.isLoad  = isLoad;
   1351    i->ARMin.NLdStQ.dQ      = dQ;
   1352    i->ARMin.NLdStQ.amode   = amode;
   1353    return i;
   1354 }
   1355 
   1356 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
   1357    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1358    i->tag                  = ARMin_NLdStD;
   1359    i->ARMin.NLdStD.isLoad  = isLoad;
   1360    i->ARMin.NLdStD.dD      = dD;
   1361    i->ARMin.NLdStD.amode   = amode;
   1362    return i;
   1363 }
   1364 
   1365 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
   1366                             UInt size, Bool Q ) {
   1367    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1368    i->tag                = ARMin_NUnary;
   1369    i->ARMin.NUnary.op   = op;
   1370    i->ARMin.NUnary.src  = nQ;
   1371    i->ARMin.NUnary.dst  = dQ;
   1372    i->ARMin.NUnary.size = size;
   1373    i->ARMin.NUnary.Q    = Q;
   1374    return i;
   1375 }
   1376 
   1377 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
   1378                              UInt size, Bool Q ) {
   1379    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1380    i->tag                = ARMin_NUnaryS;
   1381    i->ARMin.NUnaryS.op   = op;
   1382    i->ARMin.NUnaryS.src  = src;
   1383    i->ARMin.NUnaryS.dst  = dst;
   1384    i->ARMin.NUnaryS.size = size;
   1385    i->ARMin.NUnaryS.Q    = Q;
   1386    return i;
   1387 }
   1388 
   1389 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
   1390                            UInt size, Bool Q ) {
   1391    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1392    i->tag                = ARMin_NDual;
   1393    i->ARMin.NDual.op   = op;
   1394    i->ARMin.NDual.arg1 = nQ;
   1395    i->ARMin.NDual.arg2 = mQ;
   1396    i->ARMin.NDual.size = size;
   1397    i->ARMin.NDual.Q    = Q;
   1398    return i;
   1399 }
   1400 
   1401 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
   1402                              HReg dst, HReg argL, HReg argR,
   1403                              UInt size, Bool Q ) {
   1404    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1405    i->tag                = ARMin_NBinary;
   1406    i->ARMin.NBinary.op   = op;
   1407    i->ARMin.NBinary.argL = argL;
   1408    i->ARMin.NBinary.argR = argR;
   1409    i->ARMin.NBinary.dst  = dst;
   1410    i->ARMin.NBinary.size = size;
   1411    i->ARMin.NBinary.Q    = Q;
   1412    return i;
   1413 }
   1414 
   1415 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
   1416    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1417    i->tag         = ARMin_NeonImm;
   1418    i->ARMin.NeonImm.dst = dst;
   1419    i->ARMin.NeonImm.imm = imm;
   1420    return i;
   1421 }
   1422 
   1423 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
   1424    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1425    i->tag               = ARMin_NCMovQ;
   1426    i->ARMin.NCMovQ.cond = cond;
   1427    i->ARMin.NCMovQ.dst  = dst;
   1428    i->ARMin.NCMovQ.src  = src;
   1429    vassert(cond != ARMcc_AL);
   1430    return i;
   1431 }
   1432 
   1433 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
   1434                             HReg dst, HReg argL, HReg argR,
   1435                             UInt size, Bool Q ) {
   1436    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1437    i->tag                = ARMin_NShift;
   1438    i->ARMin.NShift.op   = op;
   1439    i->ARMin.NShift.argL = argL;
   1440    i->ARMin.NShift.argR = argR;
   1441    i->ARMin.NShift.dst  = dst;
   1442    i->ARMin.NShift.size = size;
   1443    i->ARMin.NShift.Q    = Q;
   1444    return i;
   1445 }
   1446 
   1447 /* Helper copy-pasted from isel.c */
   1448 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
   1449 {
   1450    UInt i;
   1451    for (i = 0; i < 16; i++) {
   1452       if (0 == (u & 0xFFFFFF00)) {
   1453          *u8 = u;
   1454          *u4 = i;
   1455          return True;
   1456       }
   1457       u = ROR32(u, 30);
   1458    }
   1459    vassert(i == 16);
   1460    return False;
   1461 }
   1462 
   1463 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
   1464    UInt u8, u4;
   1465    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1466    /* Try to generate single ADD if possible */
   1467    if (fitsIn8x4(&u8, &u4, imm32)) {
   1468       i->tag            = ARMin_Alu;
   1469       i->ARMin.Alu.op   = ARMalu_ADD;
   1470       i->ARMin.Alu.dst  = rD;
   1471       i->ARMin.Alu.argL = rN;
   1472       i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
   1473    } else {
   1474       i->tag               = ARMin_Add32;
   1475       i->ARMin.Add32.rD    = rD;
   1476       i->ARMin.Add32.rN    = rN;
   1477       i->ARMin.Add32.imm32 = imm32;
   1478    }
   1479    return i;
   1480 }
   1481 
   1482 /* ... */
   1483 
   1484 void ppARMInstr ( ARMInstr* i ) {
   1485    switch (i->tag) {
   1486       case ARMin_Alu:
   1487          vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
   1488          ppHRegARM(i->ARMin.Alu.dst);
   1489          vex_printf(", ");
   1490          ppHRegARM(i->ARMin.Alu.argL);
   1491          vex_printf(", ");
   1492          ppARMRI84(i->ARMin.Alu.argR);
   1493          return;
   1494       case ARMin_Shift:
   1495          vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
   1496          ppHRegARM(i->ARMin.Shift.dst);
   1497          vex_printf(", ");
   1498          ppHRegARM(i->ARMin.Shift.argL);
   1499          vex_printf(", ");
   1500          ppARMRI5(i->ARMin.Shift.argR);
   1501          return;
   1502       case ARMin_Unary:
   1503          vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
   1504          ppHRegARM(i->ARMin.Unary.dst);
   1505          vex_printf(", ");
   1506          ppHRegARM(i->ARMin.Unary.src);
   1507          return;
   1508       case ARMin_CmpOrTst:
   1509          vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
   1510          ppHRegARM(i->ARMin.CmpOrTst.argL);
   1511          vex_printf(", ");
   1512          ppARMRI84(i->ARMin.CmpOrTst.argR);
   1513          return;
   1514       case ARMin_Mov:
   1515          vex_printf("mov   ");
   1516          ppHRegARM(i->ARMin.Mov.dst);
   1517          vex_printf(", ");
   1518          ppARMRI84(i->ARMin.Mov.src);
   1519          return;
   1520       case ARMin_Imm32:
   1521          vex_printf("imm   ");
   1522          ppHRegARM(i->ARMin.Imm32.dst);
   1523          vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
   1524          return;
   1525       case ARMin_LdSt32:
   1526          if (i->ARMin.LdSt32.isLoad) {
   1527             vex_printf("ldr   ");
   1528             ppHRegARM(i->ARMin.LdSt32.rD);
   1529             vex_printf(", ");
   1530             ppARMAMode1(i->ARMin.LdSt32.amode);
   1531          } else {
   1532             vex_printf("str   ");
   1533             ppARMAMode1(i->ARMin.LdSt32.amode);
   1534             vex_printf(", ");
   1535             ppHRegARM(i->ARMin.LdSt32.rD);
   1536          }
   1537          return;
   1538       case ARMin_LdSt16:
   1539          if (i->ARMin.LdSt16.isLoad) {
   1540             vex_printf("%s", i->ARMin.LdSt16.signedLoad
   1541                                 ? "ldrsh " : "ldrh  " );
   1542             ppHRegARM(i->ARMin.LdSt16.rD);
   1543             vex_printf(", ");
   1544             ppARMAMode2(i->ARMin.LdSt16.amode);
   1545          } else {
   1546             vex_printf("strh  ");
   1547             ppARMAMode2(i->ARMin.LdSt16.amode);
   1548             vex_printf(", ");
   1549             ppHRegARM(i->ARMin.LdSt16.rD);
   1550          }
   1551          return;
   1552       case ARMin_LdSt8U:
   1553          if (i->ARMin.LdSt8U.isLoad) {
   1554             vex_printf("ldrb  ");
   1555             ppHRegARM(i->ARMin.LdSt8U.rD);
   1556             vex_printf(", ");
   1557             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1558          } else {
   1559             vex_printf("strb  ");
   1560             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1561             vex_printf(", ");
   1562             ppHRegARM(i->ARMin.LdSt8U.rD);
   1563          }
   1564          return;
   1565       case ARMin_Ld8S:
   1566          goto unhandled;
   1567       case ARMin_Goto:
   1568          if (i->ARMin.Goto.cond != ARMcc_AL) {
   1569             vex_printf("if (%%cpsr.%s) { ",
   1570                        showARMCondCode(i->ARMin.Goto.cond));
   1571          } else {
   1572             vex_printf("if (1) { ");
   1573          }
   1574          if (i->ARMin.Goto.jk != Ijk_Boring
   1575              && i->ARMin.Goto.jk != Ijk_Call
   1576              && i->ARMin.Goto.jk != Ijk_Ret) {
   1577             vex_printf("mov r8, $");
   1578             ppIRJumpKind(i->ARMin.Goto.jk);
   1579             vex_printf(" ; ");
   1580          }
   1581          vex_printf("mov r0, ");
   1582          ppHRegARM(i->ARMin.Goto.gnext);
   1583          vex_printf(" ; bx r14");
   1584          if (i->ARMin.Goto.cond != ARMcc_AL) {
   1585             vex_printf(" }");
   1586          } else {
   1587             vex_printf(" }");
   1588          }
   1589          return;
   1590       case ARMin_CMov:
   1591          vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
   1592          ppHRegARM(i->ARMin.CMov.dst);
   1593          vex_printf(", ");
   1594          ppARMRI84(i->ARMin.CMov.src);
   1595          return;
   1596       case ARMin_Call:
   1597          vex_printf("call%s  ",
   1598                     i->ARMin.Call.cond==ARMcc_AL
   1599                        ? "" : showARMCondCode(i->ARMin.Call.cond));
   1600          vex_printf("0x%lx [nArgRegs=%d]",
   1601                     i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
   1602          return;
   1603       case ARMin_Mul:
   1604          vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
   1605          if (i->ARMin.Mul.op == ARMmul_PLAIN) {
   1606             vex_printf("r0, r2, r3");
   1607          } else {
   1608             vex_printf("r1:r0, r2, r3");
   1609          }
   1610          return;
   1611       case ARMin_LdrEX: {
   1612          HChar* sz = "";
   1613          switch (i->ARMin.LdrEX.szB) {
   1614             case 1: sz = "b"; break; case 2: sz = "h"; break;
   1615             case 8: sz = "d"; break; case 4: break;
   1616             default: vassert(0);
   1617          }
   1618          vex_printf("ldrex%s %sr2, [r4]",
   1619                     sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
   1620          return;
   1621       }
   1622       case ARMin_StrEX: {
   1623          HChar* sz = "";
   1624          switch (i->ARMin.StrEX.szB) {
   1625             case 1: sz = "b"; break; case 2: sz = "h"; break;
   1626             case 8: sz = "d"; break; case 4: break;
   1627             default: vassert(0);
   1628          }
   1629          vex_printf("strex%s r0, %sr2, [r4]",
   1630                     sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
   1631          return;
   1632       }
   1633       case ARMin_VLdStD:
   1634          if (i->ARMin.VLdStD.isLoad) {
   1635             vex_printf("fldd  ");
   1636             ppHRegARM(i->ARMin.VLdStD.dD);
   1637             vex_printf(", ");
   1638             ppARMAModeV(i->ARMin.VLdStD.amode);
   1639          } else {
   1640             vex_printf("fstd  ");
   1641             ppARMAModeV(i->ARMin.VLdStD.amode);
   1642             vex_printf(", ");
   1643             ppHRegARM(i->ARMin.VLdStD.dD);
   1644          }
   1645          return;
   1646       case ARMin_VLdStS:
   1647          if (i->ARMin.VLdStS.isLoad) {
   1648             vex_printf("flds  ");
   1649             ppHRegARM(i->ARMin.VLdStS.fD);
   1650             vex_printf(", ");
   1651             ppARMAModeV(i->ARMin.VLdStS.amode);
   1652          } else {
   1653             vex_printf("fsts  ");
   1654             ppARMAModeV(i->ARMin.VLdStS.amode);
   1655             vex_printf(", ");
   1656             ppHRegARM(i->ARMin.VLdStS.fD);
   1657          }
   1658          return;
   1659       case ARMin_VAluD:
   1660          vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
   1661          ppHRegARM(i->ARMin.VAluD.dst);
   1662          vex_printf(", ");
   1663          ppHRegARM(i->ARMin.VAluD.argL);
   1664          vex_printf(", ");
   1665          ppHRegARM(i->ARMin.VAluD.argR);
   1666          return;
   1667       case ARMin_VAluS:
   1668          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
   1669          ppHRegARM(i->ARMin.VAluS.dst);
   1670          vex_printf(", ");
   1671          ppHRegARM(i->ARMin.VAluS.argL);
   1672          vex_printf(", ");
   1673          ppHRegARM(i->ARMin.VAluS.argR);
   1674          return;
   1675       case ARMin_VUnaryD:
   1676          vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
   1677          ppHRegARM(i->ARMin.VUnaryD.dst);
   1678          vex_printf(", ");
   1679          ppHRegARM(i->ARMin.VUnaryD.src);
   1680          return;
   1681       case ARMin_VUnaryS:
   1682          vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
   1683          ppHRegARM(i->ARMin.VUnaryS.dst);
   1684          vex_printf(", ");
   1685          ppHRegARM(i->ARMin.VUnaryS.src);
   1686          return;
   1687       case ARMin_VCmpD:
   1688          vex_printf("fcmpd ");
   1689          ppHRegARM(i->ARMin.VCmpD.argL);
   1690          vex_printf(", ");
   1691          ppHRegARM(i->ARMin.VCmpD.argR);
   1692          vex_printf(" ; fmstat");
   1693          return;
   1694       case ARMin_VCMovD:
   1695          vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
   1696          ppHRegARM(i->ARMin.VCMovD.dst);
   1697          vex_printf(", ");
   1698          ppHRegARM(i->ARMin.VCMovD.src);
   1699          return;
   1700       case ARMin_VCMovS:
   1701          vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
   1702          ppHRegARM(i->ARMin.VCMovS.dst);
   1703          vex_printf(", ");
   1704          ppHRegARM(i->ARMin.VCMovS.src);
   1705          return;
   1706       case ARMin_VCvtSD:
   1707          vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
   1708          ppHRegARM(i->ARMin.VCvtSD.dst);
   1709          vex_printf(", ");
   1710          ppHRegARM(i->ARMin.VCvtSD.src);
   1711          return;
   1712       case ARMin_VXferD:
   1713          vex_printf("vmov  ");
   1714          if (i->ARMin.VXferD.toD) {
   1715             ppHRegARM(i->ARMin.VXferD.dD);
   1716             vex_printf(", ");
   1717             ppHRegARM(i->ARMin.VXferD.rLo);
   1718             vex_printf(", ");
   1719             ppHRegARM(i->ARMin.VXferD.rHi);
   1720          } else {
   1721             ppHRegARM(i->ARMin.VXferD.rLo);
   1722             vex_printf(", ");
   1723             ppHRegARM(i->ARMin.VXferD.rHi);
   1724             vex_printf(", ");
   1725             ppHRegARM(i->ARMin.VXferD.dD);
   1726          }
   1727          return;
   1728       case ARMin_VXferS:
   1729          vex_printf("vmov  ");
   1730          if (i->ARMin.VXferS.toS) {
   1731             ppHRegARM(i->ARMin.VXferS.fD);
   1732             vex_printf(", ");
   1733             ppHRegARM(i->ARMin.VXferS.rLo);
   1734          } else {
   1735             ppHRegARM(i->ARMin.VXferS.rLo);
   1736             vex_printf(", ");
   1737             ppHRegARM(i->ARMin.VXferS.fD);
   1738          }
   1739          return;
   1740       case ARMin_VCvtID: {
   1741          HChar* nm = "?";
   1742          if (i->ARMin.VCvtID.iToD) {
   1743             nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
   1744          } else {
   1745             nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
   1746          }
   1747          vex_printf("%s ", nm);
   1748          ppHRegARM(i->ARMin.VCvtID.dst);
   1749          vex_printf(", ");
   1750          ppHRegARM(i->ARMin.VCvtID.src);
   1751          return;
   1752       }
   1753       case ARMin_FPSCR:
   1754          if (i->ARMin.FPSCR.toFPSCR) {
   1755             vex_printf("fmxr  fpscr, ");
   1756             ppHRegARM(i->ARMin.FPSCR.iReg);
   1757          } else {
   1758             vex_printf("fmrx  ");
   1759             ppHRegARM(i->ARMin.FPSCR.iReg);
   1760             vex_printf(", fpscr");
   1761          }
   1762          return;
   1763       case ARMin_MFence:
   1764          vex_printf("mfence (mcr 15,0,r0,c7,c10,4; 15,0,r0,c7,c10,5; "
   1765                     "15,0,r0,c7,c5,4)");
   1766          return;
   1767       case ARMin_CLREX:
   1768          vex_printf("clrex");
   1769          return;
   1770       case ARMin_NLdStQ:
   1771          if (i->ARMin.NLdStQ.isLoad)
   1772             vex_printf("vld1.32 {");
   1773          else
   1774             vex_printf("vst1.32 {");
   1775          ppHRegARM(i->ARMin.NLdStQ.dQ);
   1776          vex_printf("} ");
   1777          ppARMAModeN(i->ARMin.NLdStQ.amode);
   1778          return;
   1779       case ARMin_NLdStD:
   1780          if (i->ARMin.NLdStD.isLoad)
   1781             vex_printf("vld1.32 {");
   1782          else
   1783             vex_printf("vst1.32 {");
   1784          ppHRegARM(i->ARMin.NLdStD.dD);
   1785          vex_printf("} ");
   1786          ppARMAModeN(i->ARMin.NLdStD.amode);
   1787          return;
   1788       case ARMin_NUnary:
   1789          vex_printf("%s%s%s  ",
   1790                     showARMNeonUnOp(i->ARMin.NUnary.op),
   1791                     showARMNeonUnOpDataType(i->ARMin.NUnary.op),
   1792                     showARMNeonDataSize(i));
   1793          ppHRegARM(i->ARMin.NUnary.dst);
   1794          vex_printf(", ");
   1795          ppHRegARM(i->ARMin.NUnary.src);
   1796          if (i->ARMin.NUnary.op == ARMneon_EQZ)
   1797             vex_printf(", #0");
   1798          if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1799              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1800              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1801              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
   1802             vex_printf(", #%d", i->ARMin.NUnary.size);
   1803          }
   1804          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1805              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1806              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1807             UInt size;
   1808             size = i->ARMin.NUnary.size;
   1809             if (size & 0x40) {
   1810                vex_printf(", #%d", size - 64);
   1811             } else if (size & 0x20) {
   1812                vex_printf(", #%d", size - 32);
   1813             } else if (size & 0x10) {
   1814                vex_printf(", #%d", size - 16);
   1815             } else if (size & 0x08) {
   1816                vex_printf(", #%d", size - 8);
   1817             }
   1818          }
   1819          return;
   1820       case ARMin_NUnaryS:
   1821          vex_printf("%s%s%s  ",
   1822                     showARMNeonUnOpS(i->ARMin.NUnaryS.op),
   1823                     showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
   1824                     showARMNeonDataSize(i));
   1825          ppARMNRS(i->ARMin.NUnaryS.dst);
   1826          vex_printf(", ");
   1827          ppARMNRS(i->ARMin.NUnaryS.src);
   1828          return;
   1829       case ARMin_NShift:
   1830          vex_printf("%s%s%s  ",
   1831                     showARMNeonShiftOp(i->ARMin.NShift.op),
   1832                     showARMNeonShiftOpDataType(i->ARMin.NShift.op),
   1833                     showARMNeonDataSize(i));
   1834          ppHRegARM(i->ARMin.NShift.dst);
   1835          vex_printf(", ");
   1836          ppHRegARM(i->ARMin.NShift.argL);
   1837          vex_printf(", ");
   1838          ppHRegARM(i->ARMin.NShift.argR);
   1839          return;
   1840       case ARMin_NDual:
   1841          vex_printf("%s%s%s  ",
   1842                     showARMNeonDualOp(i->ARMin.NDual.op),
   1843                     showARMNeonDualOpDataType(i->ARMin.NDual.op),
   1844                     showARMNeonDataSize(i));
   1845          ppHRegARM(i->ARMin.NDual.arg1);
   1846          vex_printf(", ");
   1847          ppHRegARM(i->ARMin.NDual.arg2);
   1848          return;
   1849       case ARMin_NBinary:
   1850          vex_printf("%s%s%s",
   1851                     showARMNeonBinOp(i->ARMin.NBinary.op),
   1852                     showARMNeonBinOpDataType(i->ARMin.NBinary.op),
   1853                     showARMNeonDataSize(i));
   1854          vex_printf("  ");
   1855          ppHRegARM(i->ARMin.NBinary.dst);
   1856          vex_printf(", ");
   1857          ppHRegARM(i->ARMin.NBinary.argL);
   1858          vex_printf(", ");
   1859          ppHRegARM(i->ARMin.NBinary.argR);
   1860          return;
   1861       case ARMin_NeonImm:
   1862          vex_printf("vmov  ");
   1863          ppHRegARM(i->ARMin.NeonImm.dst);
   1864          vex_printf(", ");
   1865          ppARMNImm(i->ARMin.NeonImm.imm);
   1866          return;
   1867       case ARMin_NCMovQ:
   1868          vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
   1869          ppHRegARM(i->ARMin.NCMovQ.dst);
   1870          vex_printf(", ");
   1871          ppHRegARM(i->ARMin.NCMovQ.src);
   1872          return;
   1873       case ARMin_Add32:
   1874          vex_printf("add32 ");
   1875          ppHRegARM(i->ARMin.Add32.rD);
   1876          vex_printf(", ");
   1877          ppHRegARM(i->ARMin.Add32.rN);
   1878          vex_printf(", ");
   1879          vex_printf("%d", i->ARMin.Add32.imm32);
   1880          return;
   1881       default:
   1882       unhandled:
   1883          vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
   1884          vpanic("ppARMInstr(1)");
   1885          return;
   1886    }
   1887 }
   1888 
   1889 
   1890 /* --------- Helpers for register allocation. --------- */
   1891 
   1892 void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
   1893 {
   1894    vassert(mode64 == False);
   1895    initHRegUsage(u);
   1896    switch (i->tag) {
   1897       case ARMin_Alu:
   1898          addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
   1899          addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
   1900          addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
   1901          return;
   1902       case ARMin_Shift:
   1903          addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
   1904          addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
   1905          addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
   1906          return;
   1907       case ARMin_Unary:
   1908          addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
   1909          addHRegUse(u, HRmRead, i->ARMin.Unary.src);
   1910          return;
   1911       case ARMin_CmpOrTst:
   1912          addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
   1913          addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
   1914          return;
   1915       case ARMin_Mov:
   1916          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
   1917          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
   1918          return;
   1919       case ARMin_Imm32:
   1920          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
   1921          return;
   1922       case ARMin_LdSt32:
   1923          addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
   1924          if (i->ARMin.LdSt32.isLoad) {
   1925             addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
   1926          } else {
   1927             addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
   1928          }
   1929          return;
   1930       case ARMin_LdSt16:
   1931          addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
   1932          if (i->ARMin.LdSt16.isLoad) {
   1933             addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
   1934          } else {
   1935             addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
   1936          }
   1937          return;
   1938       case ARMin_LdSt8U:
   1939          addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
   1940          if (i->ARMin.LdSt8U.isLoad) {
   1941             addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
   1942          } else {
   1943             addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
   1944          }
   1945          return;
   1946       case ARMin_Ld8S:
   1947          goto unhandled;
   1948       case ARMin_Goto:
   1949          /* reads the reg holding the next guest addr */
   1950          addHRegUse(u, HRmRead, i->ARMin.Goto.gnext);
   1951          /* writes it to the standard integer return register */
   1952          addHRegUse(u, HRmWrite, hregARM_R0());
   1953          /* possibly messes with the baseblock pointer */
   1954          if (i->ARMin.Goto.jk != Ijk_Boring
   1955              && i->ARMin.Goto.jk != Ijk_Call
   1956              && i->ARMin.Goto.jk != Ijk_Ret)
   1957             /* note, this is irrelevant since r8 is not actually
   1958                available to the allocator.  But still .. */
   1959             addHRegUse(u, HRmWrite, hregARM_R8());
   1960          return;
   1961       case ARMin_CMov:
   1962          addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
   1963          addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
   1964          addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
   1965          return;
   1966       case ARMin_Call:
   1967          /* logic and comments copied/modified from x86 back end */
   1968          /* This is a bit subtle. */
   1969          /* First off, claim it trashes all the caller-saved regs
   1970             which fall within the register allocator's jurisdiction.
   1971             These I believe to be r0,1,2,3.  If it turns out that r9
   1972             is also caller-saved, then we'll have to add that here
   1973             too. */
   1974          addHRegUse(u, HRmWrite, hregARM_R0());
   1975          addHRegUse(u, HRmWrite, hregARM_R1());
   1976          addHRegUse(u, HRmWrite, hregARM_R2());
   1977          addHRegUse(u, HRmWrite, hregARM_R3());
   1978          /* Now we have to state any parameter-carrying registers
   1979             which might be read.  This depends on nArgRegs. */
   1980          switch (i->ARMin.Call.nArgRegs) {
   1981             case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
   1982             case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
   1983             case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
   1984             case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
   1985             case 0: break;
   1986             default: vpanic("getRegUsage_ARM:Call:regparms");
   1987          }
   1988          /* Finally, there is the issue that the insn trashes a
   1989             register because the literal target address has to be
   1990             loaded into a register.  Fortunately, for the nArgRegs=
   1991             0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
   1992             this does not cause any further damage.  For the
   1993             nArgRegs=4 case, we'll have to choose another register
   1994             arbitrarily since all the caller saved regs are used for
   1995             parameters, and so we might as well choose r11.
   1996             */
   1997          if (i->ARMin.Call.nArgRegs == 4)
   1998             addHRegUse(u, HRmWrite, hregARM_R11());
   1999          /* Upshot of this is that the assembler really must observe
   2000             the here-stated convention of which register to use as an
   2001             address temporary, depending on nArgRegs: 0==r0,
   2002             1==r1, 2==r2, 3==r3, 4==r11 */
   2003          return;
   2004       case ARMin_Mul:
   2005          addHRegUse(u, HRmRead, hregARM_R2());
   2006          addHRegUse(u, HRmRead, hregARM_R3());
   2007          addHRegUse(u, HRmWrite, hregARM_R0());
   2008          if (i->ARMin.Mul.op != ARMmul_PLAIN)
   2009             addHRegUse(u, HRmWrite, hregARM_R1());
   2010          return;
   2011       case ARMin_LdrEX:
   2012          addHRegUse(u, HRmRead, hregARM_R4());
   2013          addHRegUse(u, HRmWrite, hregARM_R2());
   2014          if (i->ARMin.LdrEX.szB == 8)
   2015             addHRegUse(u, HRmWrite, hregARM_R3());
   2016          return;
   2017       case ARMin_StrEX:
   2018          addHRegUse(u, HRmRead, hregARM_R4());
   2019          addHRegUse(u, HRmWrite, hregARM_R0());
   2020          addHRegUse(u, HRmRead, hregARM_R2());
   2021          if (i->ARMin.StrEX.szB == 8)
   2022             addHRegUse(u, HRmRead, hregARM_R3());
   2023          return;
   2024       case ARMin_VLdStD:
   2025          addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
   2026          if (i->ARMin.VLdStD.isLoad) {
   2027             addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
   2028          } else {
   2029             addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
   2030          }
   2031          return;
   2032       case ARMin_VLdStS:
   2033          addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
   2034          if (i->ARMin.VLdStS.isLoad) {
   2035             addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
   2036          } else {
   2037             addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
   2038          }
   2039          return;
   2040       case ARMin_VAluD:
   2041          addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
   2042          addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
   2043          addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
   2044          return;
   2045       case ARMin_VAluS:
   2046          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
   2047          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
   2048          addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
   2049          return;
   2050       case ARMin_VUnaryD:
   2051          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
   2052          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
   2053          return;
   2054       case ARMin_VUnaryS:
   2055          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
   2056          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
   2057          return;
   2058       case ARMin_VCmpD:
   2059          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
   2060          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
   2061          return;
   2062       case ARMin_VCMovD:
   2063          addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
   2064          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
   2065          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
   2066          return;
   2067       case ARMin_VCMovS:
   2068          addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
   2069          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
   2070          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
   2071          return;
   2072       case ARMin_VCvtSD:
   2073          addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
   2074          addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
   2075          return;
   2076       case ARMin_VXferD:
   2077          if (i->ARMin.VXferD.toD) {
   2078             addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
   2079             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
   2080             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
   2081          } else {
   2082             addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
   2083             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
   2084             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
   2085          }
   2086          return;
   2087       case ARMin_VXferS:
   2088          if (i->ARMin.VXferS.toS) {
   2089             addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
   2090             addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
   2091          } else {
   2092             addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
   2093             addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
   2094          }
   2095          return;
   2096       case ARMin_VCvtID:
   2097          addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
   2098          addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
   2099          return;
   2100       case ARMin_FPSCR:
   2101          if (i->ARMin.FPSCR.toFPSCR)
   2102             addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
   2103          else
   2104             addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
   2105          return;
   2106       case ARMin_MFence:
   2107          return;
   2108       case ARMin_CLREX:
   2109          return;
   2110       case ARMin_NLdStQ:
   2111          if (i->ARMin.NLdStQ.isLoad)
   2112             addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
   2113          else
   2114             addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
   2115          addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
   2116          return;
   2117       case ARMin_NLdStD:
   2118          if (i->ARMin.NLdStD.isLoad)
   2119             addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
   2120          else
   2121             addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
   2122          addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
   2123          return;
   2124       case ARMin_NUnary:
   2125          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
   2126          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
   2127          return;
   2128       case ARMin_NUnaryS:
   2129          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
   2130          addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
   2131          return;
   2132       case ARMin_NShift:
   2133          addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
   2134          addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
   2135          addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
   2136          return;
   2137       case ARMin_NDual:
   2138          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
   2139          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
   2140          addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
   2141          addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
   2142          return;
   2143       case ARMin_NBinary:
   2144          addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
   2145          /* TODO: sometimes dst is also being read! */
   2146          // XXX fix this
   2147          addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
   2148          addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
   2149          return;
   2150       case ARMin_NeonImm:
   2151          addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
   2152          return;
   2153       case ARMin_NCMovQ:
   2154          addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
   2155          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
   2156          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
   2157          return;
   2158       case ARMin_Add32:
   2159          addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
   2160          addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
   2161          return;
   2162       unhandled:
   2163       default:
   2164          ppARMInstr(i);
   2165          vpanic("getRegUsage_ARMInstr");
   2166    }
   2167 }
   2168 
   2169 
   2170 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
   2171 {
   2172    vassert(mode64 == False);
   2173    switch (i->tag) {
   2174       case ARMin_Alu:
   2175          i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
   2176          i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
   2177          mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
   2178          return;
   2179       case ARMin_Shift:
   2180          i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
   2181          i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
   2182          mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
   2183          return;
   2184       case ARMin_Unary:
   2185          i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
   2186          i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
   2187          return;
   2188       case ARMin_CmpOrTst:
   2189          i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
   2190          mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
   2191          return;
   2192       case ARMin_Mov:
   2193          i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
   2194          mapRegs_ARMRI84(m, i->ARMin.Mov.src);
   2195          return;
   2196       case ARMin_Imm32:
   2197          i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
   2198          return;
   2199       case ARMin_LdSt32:
   2200          i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
   2201          mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
   2202          return;
   2203       case ARMin_LdSt16:
   2204          i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
   2205          mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
   2206          return;
   2207       case ARMin_LdSt8U:
   2208          i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
   2209          mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
   2210          return;
   2211       case ARMin_Ld8S:
   2212          goto unhandled;
   2213       case ARMin_Goto:
   2214          i->ARMin.Goto.gnext = lookupHRegRemap(m, i->ARMin.Goto.gnext);
   2215          return;
   2216       case ARMin_CMov:
   2217          i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
   2218          mapRegs_ARMRI84(m, i->ARMin.CMov.src);
   2219          return;
   2220       case ARMin_Call:
   2221          return;
   2222       case ARMin_Mul:
   2223          return;
   2224       case ARMin_LdrEX:
   2225          return;
   2226       case ARMin_StrEX:
   2227          return;
   2228       case ARMin_VLdStD:
   2229          i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
   2230          mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
   2231          return;
   2232       case ARMin_VLdStS:
   2233          i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
   2234          mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
   2235          return;
   2236       case ARMin_VAluD:
   2237          i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
   2238          i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
   2239          i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
   2240          return;
   2241       case ARMin_VAluS:
   2242          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
   2243          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
   2244          i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
   2245          return;
   2246       case ARMin_VUnaryD:
   2247          i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
   2248          i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
   2249          return;
   2250       case ARMin_VUnaryS:
   2251          i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
   2252          i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
   2253          return;
   2254       case ARMin_VCmpD:
   2255          i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
   2256          i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
   2257          return;
   2258       case ARMin_VCMovD:
   2259          i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
   2260          i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
   2261          return;
   2262       case ARMin_VCMovS:
   2263          i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
   2264          i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
   2265          return;
   2266       case ARMin_VCvtSD:
   2267          i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
   2268          i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
   2269          return;
   2270       case ARMin_VXferD:
   2271          i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
   2272          i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
   2273          i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
   2274          return;
   2275       case ARMin_VXferS:
   2276          i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
   2277          i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
   2278          return;
   2279       case ARMin_VCvtID:
   2280          i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
   2281          i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
   2282          return;
   2283       case ARMin_FPSCR:
   2284          i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
   2285          return;
   2286       case ARMin_MFence:
   2287          return;
   2288       case ARMin_CLREX:
   2289          return;
   2290       case ARMin_NLdStQ:
   2291          i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
   2292          mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
   2293          return;
   2294       case ARMin_NLdStD:
   2295          i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
   2296          mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
   2297          return;
   2298       case ARMin_NUnary:
   2299          i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
   2300          i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
   2301          return;
   2302       case ARMin_NUnaryS:
   2303          i->ARMin.NUnaryS.src->reg
   2304             = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
   2305          i->ARMin.NUnaryS.dst->reg
   2306             = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
   2307          return;
   2308       case ARMin_NShift:
   2309          i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
   2310          i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
   2311          i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
   2312          return;
   2313       case ARMin_NDual:
   2314          i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
   2315          i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
   2316          return;
   2317       case ARMin_NBinary:
   2318          i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
   2319          i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
   2320          i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
   2321          return;
   2322       case ARMin_NeonImm:
   2323          i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
   2324          return;
   2325       case ARMin_NCMovQ:
   2326          i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
   2327          i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
   2328          return;
   2329       case ARMin_Add32:
   2330          i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
   2331          i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
   2332       unhandled:
   2333       default:
   2334          ppARMInstr(i);
   2335          vpanic("mapRegs_ARMInstr");
   2336    }
   2337 }
   2338 
   2339 /* Figure out if i represents a reg-reg move, and if so assign the
   2340    source and destination to *src and *dst.  If in doubt say No.  Used
   2341    by the register allocator to do move coalescing.
   2342 */
   2343 Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
   2344 {
   2345    /* Moves between integer regs */
   2346    switch (i->tag) {
   2347       case ARMin_Mov:
   2348          if (i->ARMin.Mov.src->tag == ARMri84_R) {
   2349             *src = i->ARMin.Mov.src->ARMri84.R.reg;
   2350             *dst = i->ARMin.Mov.dst;
   2351             return True;
   2352          }
   2353          break;
   2354       case ARMin_VUnaryD:
   2355          if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
   2356             *src = i->ARMin.VUnaryD.src;
   2357             *dst = i->ARMin.VUnaryD.dst;
   2358             return True;
   2359          }
   2360          break;
   2361       case ARMin_VUnaryS:
   2362          if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
   2363             *src = i->ARMin.VUnaryS.src;
   2364             *dst = i->ARMin.VUnaryS.dst;
   2365             return True;
   2366          }
   2367          break;
   2368       case ARMin_NUnary:
   2369          if (i->ARMin.NUnary.op == ARMneon_COPY) {
   2370             *src = i->ARMin.NUnary.src;
   2371             *dst = i->ARMin.NUnary.dst;
   2372             return True;
   2373          }
   2374          break;
   2375       default:
   2376          break;
   2377    }
   2378 
   2379    return False;
   2380 }
   2381 
   2382 
   2383 /* Generate arm spill/reload instructions under the direction of the
   2384    register allocator.  Note it's critical these don't write the
   2385    condition codes. */
   2386 
   2387 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2388                     HReg rreg, Int offsetB, Bool mode64 )
   2389 {
   2390    HRegClass rclass;
   2391    vassert(offsetB >= 0);
   2392    vassert(!hregIsVirtual(rreg));
   2393    vassert(mode64 == False);
   2394    *i1 = *i2 = NULL;
   2395    rclass = hregClass(rreg);
   2396    switch (rclass) {
   2397       case HRcInt32:
   2398          vassert(offsetB <= 4095);
   2399          *i1 = ARMInstr_LdSt32( False/*!isLoad*/,
   2400                                 rreg,
   2401                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2402          return;
   2403       case HRcFlt32:
   2404       case HRcFlt64: {
   2405          HReg r8   = hregARM_R8();  /* baseblock */
   2406          HReg r12  = hregARM_R12(); /* spill temp */
   2407          HReg base = r8;
   2408          vassert(0 == (offsetB & 3));
   2409          if (offsetB >= 1024) {
   2410             Int offsetKB = offsetB / 1024;
   2411             /* r12 = r8 + (1024 * offsetKB) */
   2412             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2413                                ARMRI84_I84(offsetKB, 11));
   2414             offsetB -= (1024 * offsetKB);
   2415             base = r12;
   2416          }
   2417          vassert(offsetB <= 1020);
   2418          if (rclass == HRcFlt32) {
   2419             *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
   2420                                    rreg,
   2421                                    mkARMAModeV(base, offsetB) );
   2422          } else {
   2423             *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
   2424                                    rreg,
   2425                                    mkARMAModeV(base, offsetB) );
   2426          }
   2427          return;
   2428       }
   2429       case HRcVec128: {
   2430          HReg r8  = hregARM_R8();
   2431          HReg r12 = hregARM_R12();
   2432          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2433          *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
   2434          return;
   2435       }
   2436       default:
   2437          ppHRegClass(rclass);
   2438          vpanic("genSpill_ARM: unimplemented regclass");
   2439    }
   2440 }
   2441 
   2442 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2443                      HReg rreg, Int offsetB, Bool mode64 )
   2444 {
   2445    HRegClass rclass;
   2446    vassert(offsetB >= 0);
   2447    vassert(!hregIsVirtual(rreg));
   2448    vassert(mode64 == False);
   2449    *i1 = *i2 = NULL;
   2450    rclass = hregClass(rreg);
   2451    switch (rclass) {
   2452       case HRcInt32:
   2453          vassert(offsetB <= 4095);
   2454          *i1 = ARMInstr_LdSt32( True/*isLoad*/,
   2455                                 rreg,
   2456                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2457          return;
   2458       case HRcFlt32:
   2459       case HRcFlt64: {
   2460          HReg r8   = hregARM_R8();  /* baseblock */
   2461          HReg r12  = hregARM_R12(); /* spill temp */
   2462          HReg base = r8;
   2463          vassert(0 == (offsetB & 3));
   2464          if (offsetB >= 1024) {
   2465             Int offsetKB = offsetB / 1024;
   2466             /* r12 = r8 + (1024 * offsetKB) */
   2467             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2468                                ARMRI84_I84(offsetKB, 11));
   2469             offsetB -= (1024 * offsetKB);
   2470             base = r12;
   2471          }
   2472          vassert(offsetB <= 1020);
   2473          if (rclass == HRcFlt32) {
   2474             *i2 = ARMInstr_VLdStS( True/*isLoad*/,
   2475                                    rreg,
   2476                                    mkARMAModeV(base, offsetB) );
   2477          } else {
   2478             *i2 = ARMInstr_VLdStD( True/*isLoad*/,
   2479                                    rreg,
   2480                                    mkARMAModeV(base, offsetB) );
   2481          }
   2482          return;
   2483       }
   2484       case HRcVec128: {
   2485          HReg r8  = hregARM_R8();
   2486          HReg r12 = hregARM_R12();
   2487          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2488          *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
   2489          return;
   2490       }
   2491       default:
   2492          ppHRegClass(rclass);
   2493          vpanic("genReload_ARM: unimplemented regclass");
   2494    }
   2495 }
   2496 
   2497 
   2498 /* Emit an instruction into buf and return the number of bytes used.
   2499    Note that buf is not the insn's final place, and therefore it is
   2500    imperative to emit position-independent code. */
   2501 
   2502 static inline UChar iregNo ( HReg r )
   2503 {
   2504    UInt n;
   2505    vassert(hregClass(r) == HRcInt32);
   2506    vassert(!hregIsVirtual(r));
   2507    n = hregNumber(r);
   2508    vassert(n <= 15);
   2509    return toUChar(n);
   2510 }
   2511 
   2512 static inline UChar dregNo ( HReg r )
   2513 {
   2514    UInt n;
   2515    if (hregClass(r) != HRcFlt64)
   2516       ppHRegClass(hregClass(r));
   2517    vassert(hregClass(r) == HRcFlt64);
   2518    vassert(!hregIsVirtual(r));
   2519    n = hregNumber(r);
   2520    vassert(n <= 31);
   2521    return toUChar(n);
   2522 }
   2523 
   2524 static inline UChar fregNo ( HReg r )
   2525 {
   2526    UInt n;
   2527    vassert(hregClass(r) == HRcFlt32);
   2528    vassert(!hregIsVirtual(r));
   2529    n = hregNumber(r);
   2530    vassert(n <= 31);
   2531    return toUChar(n);
   2532 }
   2533 
   2534 static inline UChar qregNo ( HReg r )
   2535 {
   2536    UInt n;
   2537    vassert(hregClass(r) == HRcVec128);
   2538    vassert(!hregIsVirtual(r));
   2539    n = hregNumber(r);
   2540    vassert(n <= 15);
   2541    return toUChar(n);
   2542 }
   2543 
   2544 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
   2545    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
   2546 #define X0000  BITS4(0,0,0,0)
   2547 #define X0001  BITS4(0,0,0,1)
   2548 #define X0010  BITS4(0,0,1,0)
   2549 #define X0011  BITS4(0,0,1,1)
   2550 #define X0100  BITS4(0,1,0,0)
   2551 #define X0101  BITS4(0,1,0,1)
   2552 #define X0110  BITS4(0,1,1,0)
   2553 #define X0111  BITS4(0,1,1,1)
   2554 #define X1000  BITS4(1,0,0,0)
   2555 #define X1001  BITS4(1,0,0,1)
   2556 #define X1010  BITS4(1,0,1,0)
   2557 #define X1011  BITS4(1,0,1,1)
   2558 #define X1100  BITS4(1,1,0,0)
   2559 #define X1101  BITS4(1,1,0,1)
   2560 #define X1110  BITS4(1,1,1,0)
   2561 #define X1111  BITS4(1,1,1,1)
   2562 
   2563 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
   2564    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2565     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2566     (((zzx3) & 0xF) << 12))
   2567 
   2568 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
   2569    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2570     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2571     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
   2572 
   2573 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
   2574    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2575     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2576     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
   2577 
   2578 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
   2579   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
   2580    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
   2581    (((zzx0) & 0xF) << 0))
   2582 
   2583 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
   2584    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2585     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2586     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
   2587     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
   2588 
   2589 /* Generate a skeletal insn that involves an a RI84 shifter operand.
   2590    Returns a word which is all zeroes apart from bits 25 and 11..0,
   2591    since it is those that encode the shifter operand (at least to the
   2592    extent that we care about it.) */
   2593 static UInt skeletal_RI84 ( ARMRI84* ri )
   2594 {
   2595    UInt instr;
   2596    if (ri->tag == ARMri84_I84) {
   2597       vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
   2598       vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
   2599       instr = 1 << 25;
   2600       instr |= (ri->ARMri84.I84.imm4 << 8);
   2601       instr |= ri->ARMri84.I84.imm8;
   2602    } else {
   2603       instr = 0 << 25;
   2604       instr |= iregNo(ri->ARMri84.R.reg);
   2605    }
   2606    return instr;
   2607 }
   2608 
   2609 /* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
   2610    11..7. */
   2611 static UInt skeletal_RI5 ( ARMRI5* ri )
   2612 {
   2613    UInt instr;
   2614    if (ri->tag == ARMri5_I5) {
   2615       UInt imm5 = ri->ARMri5.I5.imm5;
   2616       vassert(imm5 >= 1 && imm5 <= 31);
   2617       instr = 0 << 4;
   2618       instr |= imm5 << 7;
   2619    } else {
   2620       instr = 1 << 4;
   2621       instr |= iregNo(ri->ARMri5.R.reg) << 8;
   2622    }
   2623    return instr;
   2624 }
   2625 
   2626 
   2627 /* Get an immediate into a register, using only that
   2628    register.  (very lame..) */
   2629 static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
   2630 {
   2631    UInt instr;
   2632    vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
   2633 #if 0
   2634    if (0 == (imm32 & ~0xFF)) {
   2635       /* mov with a immediate shifter operand of (0, imm32) (??) */
   2636       instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
   2637       instr |= imm32;
   2638       *p++ = instr;
   2639    } else {
   2640       // this is very bad; causes Dcache pollution
   2641       // ldr  rD, [pc]
   2642       instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
   2643       *p++ = instr;
   2644       // b .+8
   2645       instr = 0xEA000000;
   2646       *p++ = instr;
   2647       // .word imm32
   2648       *p++ = imm32;
   2649    }
   2650 #else
   2651    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2652       /* Generate movw rD, #low16.  Then, if the high 16 are
   2653          nonzero, generate movt rD, #high16. */
   2654       UInt lo16 = imm32 & 0xFFFF;
   2655       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2656       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2657                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2658                        lo16 & 0xF);
   2659       *p++ = instr;
   2660       if (hi16 != 0) {
   2661          instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2662                           (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2663                           hi16 & 0xF);
   2664          *p++ = instr;
   2665       }
   2666    } else {
   2667       UInt imm, rot;
   2668       UInt op = X1010;
   2669       UInt rN = 0;
   2670       if ((imm32 & 0xFF) || (imm32 == 0)) {
   2671          imm = imm32 & 0xFF;
   2672          rot = 0;
   2673          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2674          *p++ = instr;
   2675          op = X1000;
   2676          rN = rD;
   2677       }
   2678       if (imm32 & 0xFF000000) {
   2679          imm = (imm32 >> 24) & 0xFF;
   2680          rot = 4;
   2681          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2682          *p++ = instr;
   2683          op = X1000;
   2684          rN = rD;
   2685       }
   2686       if (imm32 & 0xFF0000) {
   2687          imm = (imm32 >> 16) & 0xFF;
   2688          rot = 8;
   2689          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2690          *p++ = instr;
   2691          op = X1000;
   2692          rN = rD;
   2693       }
   2694       if (imm32 & 0xFF00) {
   2695          imm = (imm32 >> 8) & 0xFF;
   2696          rot = 12;
   2697          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2698          *p++ = instr;
   2699          op = X1000;
   2700          rN = rD;
   2701       }
   2702    }
   2703 #endif
   2704    return p;
   2705 }
   2706 
   2707 
   2708 Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i,
   2709                     Bool mode64,
   2710                     void* dispatch_unassisted, void* dispatch_assisted )
   2711 {
   2712    UInt* p = (UInt*)buf;
   2713    vassert(nbuf >= 32);
   2714    vassert(mode64 == False);
   2715    vassert(0 == (((HWord)buf) & 3));
   2716 
   2717    switch (i->tag) {
   2718       case ARMin_Alu: {
   2719          UInt     instr, subopc;
   2720          UInt     rD   = iregNo(i->ARMin.Alu.dst);
   2721          UInt     rN   = iregNo(i->ARMin.Alu.argL);
   2722          ARMRI84* argR = i->ARMin.Alu.argR;
   2723          switch (i->ARMin.Alu.op) {
   2724             case ARMalu_ADDS: /* fallthru */
   2725             case ARMalu_ADD:  subopc = X0100; break;
   2726             case ARMalu_ADC:  subopc = X0101; break;
   2727             case ARMalu_SUBS: /* fallthru */
   2728             case ARMalu_SUB:  subopc = X0010; break;
   2729             case ARMalu_SBC:  subopc = X0110; break;
   2730             case ARMalu_AND:  subopc = X0000; break;
   2731             case ARMalu_BIC:  subopc = X1110; break;
   2732             case ARMalu_OR:   subopc = X1100; break;
   2733             case ARMalu_XOR:  subopc = X0001; break;
   2734             default: goto bad;
   2735          }
   2736          instr = skeletal_RI84(argR);
   2737          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2738                            (subopc << 1) & 0xF, rN, rD);
   2739          if (i->ARMin.Alu.op == ARMalu_ADDS
   2740              || i->ARMin.Alu.op == ARMalu_SUBS) {
   2741             instr |= 1<<20;  /* set the S bit */
   2742          }
   2743          *p++ = instr;
   2744          goto done;
   2745       }
   2746       case ARMin_Shift: {
   2747          UInt    instr, subopc;
   2748          HReg    rD   = iregNo(i->ARMin.Shift.dst);
   2749          HReg    rM   = iregNo(i->ARMin.Shift.argL);
   2750          ARMRI5* argR = i->ARMin.Shift.argR;
   2751          switch (i->ARMin.Shift.op) {
   2752             case ARMsh_SHL: subopc = X0000; break;
   2753             case ARMsh_SHR: subopc = X0001; break;
   2754             case ARMsh_SAR: subopc = X0010; break;
   2755             default: goto bad;
   2756          }
   2757          instr = skeletal_RI5(argR);
   2758          instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
   2759          instr |= (subopc & 3) << 5;
   2760          *p++ = instr;
   2761          goto done;
   2762       }
   2763       case ARMin_Unary: {
   2764          UInt instr;
   2765          HReg rDst = iregNo(i->ARMin.Unary.dst);
   2766          HReg rSrc = iregNo(i->ARMin.Unary.src);
   2767          switch (i->ARMin.Unary.op) {
   2768             case ARMun_CLZ:
   2769                instr = XXXXXXXX(X1110,X0001,X0110,X1111,
   2770                                 rDst,X1111,X0001,rSrc);
   2771                *p++ = instr;
   2772                goto done;
   2773             case ARMun_NEG: /* RSB rD,rS,#0 */
   2774                instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
   2775                *p++ = instr;
   2776                goto done;
   2777             case ARMun_NOT: {
   2778                UInt subopc = X1111; /* MVN */
   2779                instr = rSrc;
   2780                instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2781                                  (subopc << 1) & 0xF, 0, rDst);
   2782                *p++ = instr;
   2783                goto done;
   2784             }
   2785             default:
   2786                break;
   2787          }
   2788          goto bad;
   2789       }
   2790       case ARMin_CmpOrTst: {
   2791          UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
   2792          UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
   2793          UInt SBZ    = 0;
   2794          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2795                            ((subopc << 1) & 0xF) | 1,
   2796                            i->ARMin.CmpOrTst.argL, SBZ );
   2797          *p++ = instr;
   2798          goto done;
   2799       }
   2800       case ARMin_Mov: {
   2801          UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
   2802          UInt subopc = X1101; /* MOV */
   2803          UInt SBZ    = 0;
   2804          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2805                            (subopc << 1) & 0xF, SBZ, i->ARMin.Mov.dst);
   2806          *p++ = instr;
   2807          goto done;
   2808       }
   2809       case ARMin_Imm32: {
   2810          p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
   2811                                         i->ARMin.Imm32.imm32 );
   2812          goto done;
   2813       }
   2814       case ARMin_LdSt32:
   2815       case ARMin_LdSt8U: {
   2816          UInt       bL, bB;
   2817          HReg       rD;
   2818          ARMAMode1* am;
   2819          if (i->tag == ARMin_LdSt32) {
   2820             bB = 0;
   2821             bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
   2822             am = i->ARMin.LdSt32.amode;
   2823             rD = i->ARMin.LdSt32.rD;
   2824          } else {
   2825             bB = 1;
   2826             bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
   2827             am = i->ARMin.LdSt8U.amode;
   2828             rD = i->ARMin.LdSt8U.rD;
   2829          }
   2830          if (am->tag == ARMam1_RI) {
   2831             Int  simm12;
   2832             UInt instr, bP;
   2833             if (am->ARMam1.RI.simm13 < 0) {
   2834                bP = 0;
   2835                simm12 = -am->ARMam1.RI.simm13;
   2836             } else {
   2837                bP = 1;
   2838                simm12 = am->ARMam1.RI.simm13;
   2839             }
   2840             vassert(simm12 >= 0 && simm12 <= 4095);
   2841             instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
   2842                              iregNo(am->ARMam1.RI.reg),
   2843                              iregNo(rD));
   2844             instr |= simm12;
   2845             *p++ = instr;
   2846             goto done;
   2847          } else {
   2848             // RR case
   2849             goto bad;
   2850          }
   2851       }
   2852       case ARMin_LdSt16: {
   2853          HReg       rD = i->ARMin.LdSt16.rD;
   2854          UInt       bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
   2855          UInt       bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
   2856          ARMAMode2* am = i->ARMin.LdSt16.amode;
   2857          if (am->tag == ARMam2_RI) {
   2858             HReg rN = am->ARMam2.RI.reg;
   2859             Int  simm8;
   2860             UInt bP, imm8hi, imm8lo, instr;
   2861             if (am->ARMam2.RI.simm9 < 0) {
   2862                bP = 0;
   2863                simm8 = -am->ARMam2.RI.simm9;
   2864             } else {
   2865                bP = 1;
   2866                simm8 = am->ARMam2.RI.simm9;
   2867             }
   2868             vassert(simm8 >= 0 && simm8 <= 255);
   2869             imm8hi = (simm8 >> 4) & 0xF;
   2870             imm8lo = simm8 & 0xF;
   2871             vassert(!(bL == 0 && bS == 1)); // "! signed store"
   2872             /**/ if (bL == 0 && bS == 0) {
   2873                // strh
   2874                instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,0), iregNo(rN),
   2875                                 iregNo(rD), imm8hi, X1011, imm8lo);
   2876                *p++ = instr;
   2877                goto done;
   2878             }
   2879             else if (bL == 1 && bS == 0) {
   2880                // ldrh
   2881                instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,1), iregNo(rN),
   2882                                 iregNo(rD), imm8hi, X1011, imm8lo);
   2883                *p++ = instr;
   2884                goto done;
   2885             }
   2886             else if (bL == 1 && bS == 1) {
   2887                goto bad;
   2888             }
   2889             else vassert(0); // ill-constructed insn
   2890          } else {
   2891             // RR case
   2892             goto bad;
   2893          }
   2894       }
   2895       case ARMin_Ld8S:
   2896          goto bad;
   2897       case ARMin_Goto: {
   2898          UInt        instr;
   2899          IRJumpKind  jk    = i->ARMin.Goto.jk;
   2900          ARMCondCode cond  = i->ARMin.Goto.cond;
   2901          UInt        rnext = iregNo(i->ARMin.Goto.gnext);
   2902          Int         trc   = -1;
   2903          /* since we branch to lr(r13) to get back to dispatch: */
   2904          vassert(dispatch_unassisted == NULL);
   2905          vassert(dispatch_assisted == NULL);
   2906          switch (jk) {
   2907             case Ijk_Ret: case Ijk_Call: case Ijk_Boring:
   2908                break; /* no need to set GST in these common cases */
   2909             case Ijk_ClientReq:
   2910                trc = VEX_TRC_JMP_CLIENTREQ; break;
   2911             case Ijk_Sys_int128:
   2912             case Ijk_Sys_int129:
   2913             case Ijk_Sys_int130:
   2914             case Ijk_Yield:
   2915             case Ijk_EmWarn:
   2916             case Ijk_MapFail:
   2917                goto unhandled_jk;
   2918             case Ijk_YieldNoRedir:
   2919                trc = VEX_TRC_JMP_YIELD_NOREDIR; break;
   2920             case Ijk_NoDecode:
   2921                trc = VEX_TRC_JMP_NODECODE; break;
   2922             case Ijk_TInval:
   2923                trc = VEX_TRC_JMP_TINVAL; break;
   2924             case Ijk_NoRedir:
   2925                trc = VEX_TRC_JMP_NOREDIR; break;
   2926             case Ijk_Sys_sysenter:
   2927             case Ijk_SigTRAP:
   2928             case Ijk_SigSEGV:
   2929                goto unhandled_jk;
   2930             case Ijk_Sys_syscall:
   2931                trc = VEX_TRC_JMP_SYS_SYSCALL; break;
   2932             unhandled_jk:
   2933             default:
   2934                goto bad;
   2935          }
   2936          if (trc != -1) {
   2937             // mov{cond} r8, #trc
   2938             vassert(trc >= 0 && trc <= 255);
   2939             instr = (cond << 28) | 0x03A08000 | (0xFF & (UInt)trc);
   2940             *p++ = instr;
   2941          }
   2942          // mov{cond} r0, rnext
   2943          if (rnext != 0) {
   2944             instr = (cond << 28) | 0x01A00000 | rnext;
   2945             *p++ = instr;
   2946          }
   2947          // bx{cond} r14
   2948          instr =(cond << 28) | 0x012FFF1E;
   2949          *p++ = instr;
   2950          goto done;
   2951       }
   2952       case ARMin_CMov: {
   2953          UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
   2954          UInt subopc = X1101; /* MOV */
   2955          UInt SBZ    = 0;
   2956          instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
   2957                            (subopc << 1) & 0xF, SBZ, i->ARMin.CMov.dst);
   2958          *p++ = instr;
   2959          goto done;
   2960       }
   2961       case ARMin_Call: {
   2962          UInt instr;
   2963          /* Decide on a scratch reg used to hold to the call address.
   2964             This has to be done as per the comments in getRegUsage. */
   2965          Int scratchNo;
   2966          switch (i->ARMin.Call.nArgRegs) {
   2967             case 0:  scratchNo = 0;  break;
   2968             case 1:  scratchNo = 1;  break;
   2969             case 2:  scratchNo = 2;  break;
   2970             case 3:  scratchNo = 3;  break;
   2971             case 4:  scratchNo = 11; break;
   2972             default: vassert(0);
   2973          }
   2974          // r"scratchNo" = &target
   2975          p = imm32_to_iregNo( (UInt*)p,
   2976                               scratchNo, (UInt)i->ARMin.Call.target );
   2977          // blx{cond} r"scratchNo"
   2978          instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
   2979                           X0011, scratchNo);
   2980          instr |= 0xFFF << 8; // stick in the SBOnes
   2981          *p++ = instr;
   2982          goto done;
   2983       }
   2984       case ARMin_Mul: {
   2985          /* E0000392   mul     r0, r2, r3
   2986             E0810392   umull   r0(LO), r1(HI), r2, r3
   2987             E0C10392   smull   r0(LO), r1(HI), r2, r3
   2988          */
   2989          switch (i->ARMin.Mul.op) {
   2990             case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
   2991             case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
   2992             case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
   2993             default: vassert(0);
   2994          }
   2995          goto bad;
   2996       }
   2997       case ARMin_LdrEX: {
   2998          /* E1D42F9F   ldrexb r2, [r4]
   2999             E1F42F9F   ldrexh r2, [r4]
   3000             E1942F9F   ldrex  r2, [r4]
   3001             E1B42F9F   ldrexd r2, r3, [r4]
   3002          */
   3003          switch (i->ARMin.LdrEX.szB) {
   3004             case 1: *p++ = 0xE1D42F9F; goto done;
   3005             case 2: *p++ = 0xE1F42F9F; goto done;
   3006             case 4: *p++ = 0xE1942F9F; goto done;
   3007             case 8: *p++ = 0xE1B42F9F; goto done;
   3008             default: break;
   3009          }
   3010          goto bad;
   3011       }
   3012       case ARMin_StrEX: {
   3013          /* E1C40F92   strexb r0, r2, [r4]
   3014             E1E40F92   strexh r0, r2, [r4]
   3015             E1840F92   strex  r0, r2, [r4]
   3016             E1A40F92   strexd r0, r2, r3, [r4]
   3017          */
   3018          switch (i->ARMin.StrEX.szB) {
   3019             case 1: *p++ = 0xE1C40F92; goto done;
   3020             case 2: *p++ = 0xE1E40F92; goto done;
   3021             case 4: *p++ = 0xE1840F92; goto done;
   3022             case 8: *p++ = 0xE1A40F92; goto done;
   3023             default: break;
   3024          }
   3025          goto bad;
   3026       }
   3027       case ARMin_VLdStD: {
   3028          UInt dD     = dregNo(i->ARMin.VLdStD.dD);
   3029          UInt rN     = iregNo(i->ARMin.VLdStD.amode->reg);
   3030          Int  simm11 = i->ARMin.VLdStD.amode->simm11;
   3031          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3032          UInt bU     = simm11 >= 0 ? 1 : 0;
   3033          UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
   3034          UInt insn;
   3035          vassert(0 == (off8 & 3));
   3036          off8 >>= 2;
   3037          vassert(0 == (off8 & 0xFFFFFF00));
   3038          insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
   3039          insn |= off8;
   3040          *p++ = insn;
   3041          goto done;
   3042       }
   3043       case ARMin_VLdStS: {
   3044          UInt fD     = fregNo(i->ARMin.VLdStS.fD);
   3045          UInt rN     = iregNo(i->ARMin.VLdStS.amode->reg);
   3046          Int  simm11 = i->ARMin.VLdStS.amode->simm11;
   3047          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3048          UInt bU     = simm11 >= 0 ? 1 : 0;
   3049          UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
   3050          UInt bD     = fD & 1;
   3051          UInt insn;
   3052          vassert(0 == (off8 & 3));
   3053          off8 >>= 2;
   3054          vassert(0 == (off8 & 0xFFFFFF00));
   3055          insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
   3056          insn |= off8;
   3057          *p++ = insn;
   3058          goto done;
   3059       }
   3060       case ARMin_VAluD: {
   3061          UInt dN = dregNo(i->ARMin.VAluD.argL);
   3062          UInt dD = dregNo(i->ARMin.VAluD.dst);
   3063          UInt dM = dregNo(i->ARMin.VAluD.argR);
   3064          UInt pqrs = X1111; /* undefined */
   3065          switch (i->ARMin.VAluD.op) {
   3066             case ARMvfp_ADD: pqrs = X0110; break;
   3067             case ARMvfp_SUB: pqrs = X0111; break;
   3068             case ARMvfp_MUL: pqrs = X0100; break;
   3069             case ARMvfp_DIV: pqrs = X1000; break;
   3070             default: goto bad;
   3071          }
   3072          vassert(pqrs != X1111);
   3073          UInt bP  = (pqrs >> 3) & 1;
   3074          UInt bQ  = (pqrs >> 2) & 1;
   3075          UInt bR  = (pqrs >> 1) & 1;
   3076          UInt bS  = (pqrs >> 0) & 1;
   3077          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
   3078                               X1011, BITS4(0,bS,0,0), dM);
   3079          *p++ = insn;
   3080          goto done;
   3081       }
   3082       case ARMin_VAluS: {
   3083          UInt dN = fregNo(i->ARMin.VAluS.argL);
   3084          UInt dD = fregNo(i->ARMin.VAluS.dst);
   3085          UInt dM = fregNo(i->ARMin.VAluS.argR);
   3086          UInt bN = dN & 1;
   3087          UInt bD = dD & 1;
   3088          UInt bM = dM & 1;
   3089          UInt pqrs = X1111; /* undefined */
   3090          switch (i->ARMin.VAluS.op) {
   3091             case ARMvfp_ADD: pqrs = X0110; break;
   3092             case ARMvfp_SUB: pqrs = X0111; break;
   3093             case ARMvfp_MUL: pqrs = X0100; break;
   3094             case ARMvfp_DIV: pqrs = X1000; break;
   3095             default: goto bad;
   3096          }
   3097          vassert(pqrs != X1111);
   3098          UInt bP  = (pqrs >> 3) & 1;
   3099          UInt bQ  = (pqrs >> 2) & 1;
   3100          UInt bR  = (pqrs >> 1) & 1;
   3101          UInt bS  = (pqrs >> 0) & 1;
   3102          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
   3103                               (dN >> 1), (dD >> 1),
   3104                               X1010, BITS4(bN,bS,bM,0), (dM >> 1));
   3105          *p++ = insn;
   3106          goto done;
   3107       }
   3108       case ARMin_VUnaryD: {
   3109          UInt dD   = dregNo(i->ARMin.VUnaryD.dst);
   3110          UInt dM   = dregNo(i->ARMin.VUnaryD.src);
   3111          UInt insn = 0;
   3112          switch (i->ARMin.VUnaryD.op) {
   3113             case ARMvfpu_COPY:
   3114                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3115                break;
   3116             case ARMvfpu_ABS:
   3117                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
   3118                break;
   3119             case ARMvfpu_NEG:
   3120                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
   3121                break;
   3122             case ARMvfpu_SQRT:
   3123                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
   3124                break;
   3125             default:
   3126                goto bad;
   3127          }
   3128          *p++ = insn;
   3129          goto done;
   3130       }
   3131       case ARMin_VUnaryS: {
   3132          UInt fD   = fregNo(i->ARMin.VUnaryS.dst);
   3133          UInt fM   = fregNo(i->ARMin.VUnaryS.src);
   3134          UInt insn = 0;
   3135          switch (i->ARMin.VUnaryS.op) {
   3136             case ARMvfpu_COPY:
   3137                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3138                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3139                                (fM >> 1));
   3140                break;
   3141             case ARMvfpu_ABS:
   3142                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3143                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3144                                (fM >> 1));
   3145                break;
   3146             case ARMvfpu_NEG:
   3147                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3148                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3149                                (fM >> 1));
   3150                break;
   3151             case ARMvfpu_SQRT:
   3152                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3153                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3154                                (fM >> 1));
   3155                break;
   3156             default:
   3157                goto bad;
   3158          }
   3159          *p++ = insn;
   3160          goto done;
   3161       }
   3162       case ARMin_VCmpD: {
   3163          UInt dD   = dregNo(i->ARMin.VCmpD.argL);
   3164          UInt dM   = dregNo(i->ARMin.VCmpD.argR);
   3165          UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
   3166          *p++ = insn;       /* FCMPD dD, dM */
   3167          *p++ = 0xEEF1FA10; /* FMSTAT */
   3168          goto done;
   3169       }
   3170       case ARMin_VCMovD: {
   3171          UInt cc = (UInt)i->ARMin.VCMovD.cond;
   3172          UInt dD = dregNo(i->ARMin.VCMovD.dst);
   3173          UInt dM = dregNo(i->ARMin.VCMovD.src);
   3174          vassert(cc < 16 && cc != ARMcc_AL);
   3175          UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3176          *p++ = insn;
   3177          goto done;
   3178       }
   3179       case ARMin_VCMovS: {
   3180          UInt cc = (UInt)i->ARMin.VCMovS.cond;
   3181          UInt fD = fregNo(i->ARMin.VCMovS.dst);
   3182          UInt fM = fregNo(i->ARMin.VCMovS.src);
   3183          vassert(cc < 16 && cc != ARMcc_AL);
   3184          UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
   3185                               X0000,(fD >> 1),X1010,
   3186                               BITS4(0,1,(fM & 1),0), (fM >> 1));
   3187          *p++ = insn;
   3188          goto done;
   3189       }
   3190       case ARMin_VCvtSD: {
   3191          if (i->ARMin.VCvtSD.sToD) {
   3192             UInt dD = dregNo(i->ARMin.VCvtSD.dst);
   3193             UInt fM = fregNo(i->ARMin.VCvtSD.src);
   3194             UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
   3195                                  BITS4(1,1, (fM & 1), 0),
   3196                                  (fM >> 1));
   3197             *p++ = insn;
   3198             goto done;
   3199          } else {
   3200             UInt fD = fregNo(i->ARMin.VCvtSD.dst);
   3201             UInt dM = dregNo(i->ARMin.VCvtSD.src);
   3202             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
   3203                                  X0111, (fD >> 1),
   3204                                  X1011, X1100, dM);
   3205             *p++ = insn;
   3206             goto done;
   3207          }
   3208          goto bad;
   3209       }
   3210       case ARMin_VXferD: {
   3211          UInt dD  = dregNo(i->ARMin.VXferD.dD);
   3212          UInt rHi = iregNo(i->ARMin.VXferD.rHi);
   3213          UInt rLo = iregNo(i->ARMin.VXferD.rLo);
   3214          /* vmov dD, rLo, rHi is
   3215             E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3216             vmov rLo, rHi, dD is
   3217             E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3218          */
   3219          UInt insn
   3220             = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
   3221                        rHi, rLo, 0xB,
   3222                        BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
   3223          *p++ = insn;
   3224          goto done;
   3225       }
   3226       case ARMin_VXferS: {
   3227          UInt fD  = fregNo(i->ARMin.VXferS.fD);
   3228          UInt rLo = iregNo(i->ARMin.VXferS.rLo);
   3229          /* vmov fD, rLo is
   3230             E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
   3231             vmov rLo, fD is
   3232             E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
   3233          */
   3234          UInt insn
   3235             = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
   3236                        (fD >> 1) & 0xF, rLo, 0xA,
   3237                        BITS4((fD & 1),0,0,1), 0);
   3238          *p++ = insn;
   3239          goto done;
   3240       }
   3241       case ARMin_VCvtID: {
   3242          Bool iToD = i->ARMin.VCvtID.iToD;
   3243          Bool syned = i->ARMin.VCvtID.syned;
   3244          if (iToD && syned) {
   3245             // FSITOD: I32S-in-freg to F64-in-dreg
   3246             UInt regF = fregNo(i->ARMin.VCvtID.src);
   3247             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   3248             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3249                                  X1011, BITS4(1,1,(regF & 1),0),
   3250                                  (regF >> 1) & 0xF);
   3251             *p++ = insn;
   3252             goto done;
   3253          }
   3254          if (iToD && (!syned)) {
   3255             // FUITOD: I32U-in-freg to F64-in-dreg
   3256             UInt regF = fregNo(i->ARMin.VCvtID.src);
   3257             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   3258             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3259                                  X1011, BITS4(0,1,(regF & 1),0),
   3260                                  (regF >> 1) & 0xF);
   3261             *p++ = insn;
   3262             goto done;
   3263          }
   3264          if ((!iToD) && syned) {
   3265             // FTOSID: F64-in-dreg to I32S-in-freg
   3266             UInt regD = dregNo(i->ARMin.VCvtID.src);
   3267             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   3268             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3269                                  X1101, (regF >> 1) & 0xF,
   3270                                  X1011, X0100, regD);
   3271             *p++ = insn;
   3272             goto done;
   3273          }
   3274          if ((!iToD) && (!syned)) {
   3275             // FTOUID: F64-in-dreg to I32U-in-freg
   3276             UInt regD = dregNo(i->ARMin.VCvtID.src);
   3277             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   3278             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3279                                  X1100, (regF >> 1) & 0xF,
   3280                                  X1011, X0100, regD);
   3281             *p++ = insn;
   3282             goto done;
   3283          }
   3284          /*UNREACHED*/
   3285          vassert(0);
   3286       }
   3287       case ARMin_FPSCR: {
   3288          Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
   3289          HReg iReg    = iregNo(i->ARMin.FPSCR.iReg);
   3290          if (toFPSCR) {
   3291             /* fmxr fpscr, iReg is EEE1 iReg A10 */
   3292             *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
   3293             goto done;
   3294          }
   3295          goto bad; // FPSCR -> iReg case currently ATC
   3296       }
   3297       case ARMin_MFence: {
   3298          *p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
   3299          *p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
   3300          *p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
   3301          goto done;
   3302       }
   3303       case ARMin_CLREX: {
   3304          *p++ = 0xF57FF01F; /* clrex */
   3305          goto done;
   3306       }
   3307 
   3308       case ARMin_NLdStQ: {
   3309          UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
   3310          UInt regN, regM;
   3311          UInt D = regD >> 4;
   3312          UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
   3313          UInt insn;
   3314          vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
   3315          regD &= 0xF;
   3316          if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
   3317             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
   3318             regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
   3319          } else {
   3320             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
   3321             regM = 15;
   3322          }
   3323          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   3324                               regN, regD, X1010, X1000, regM);
   3325          *p++ = insn;
   3326          goto done;
   3327       }
   3328       case ARMin_NLdStD: {
   3329          UInt regD = dregNo(i->ARMin.NLdStD.dD);
   3330          UInt regN, regM;
   3331          UInt D = regD >> 4;
   3332          UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
   3333          UInt insn;
   3334          vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
   3335          regD &= 0xF;
   3336          if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
   3337             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
   3338             regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
   3339          } else {
   3340             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
   3341             regM = 15;
   3342          }
   3343          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   3344                               regN, regD, X0111, X1000, regM);
   3345          *p++ = insn;
   3346          goto done;
   3347       }
   3348       case ARMin_NUnaryS: {
   3349          UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
   3350          UInt regD, D;
   3351          UInt regM, M;
   3352          UInt size = i->ARMin.NUnaryS.size;
   3353          UInt insn;
   3354          UInt opc, opc1, opc2;
   3355          switch (i->ARMin.NUnaryS.op) {
   3356 	    case ARMneon_VDUP:
   3357                if (i->ARMin.NUnaryS.size >= 16)
   3358                   goto bad;
   3359                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
   3360                   goto bad;
   3361                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3362                   goto bad;
   3363                regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
   3364                         ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
   3365                         : dregNo(i->ARMin.NUnaryS.dst->reg);
   3366                regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
   3367                         ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
   3368                         : dregNo(i->ARMin.NUnaryS.src->reg);
   3369                D = regD >> 4;
   3370                M = regM >> 4;
   3371                regD &= 0xf;
   3372                regM &= 0xf;
   3373                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
   3374                                (i->ARMin.NUnaryS.size & 0xf), regD,
   3375                                X1100, BITS4(0,Q,M,0), regM);
   3376                *p++ = insn;
   3377                goto done;
   3378             case ARMneon_SETELEM:
   3379                regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
   3380                                 dregNo(i->ARMin.NUnaryS.dst->reg);
   3381                regM = iregNo(i->ARMin.NUnaryS.src->reg);
   3382                M = regM >> 4;
   3383                D = regD >> 4;
   3384                regM &= 0xF;
   3385                regD &= 0xF;
   3386                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
   3387                   goto bad;
   3388                switch (size) {
   3389                   case 0:
   3390                      if (i->ARMin.NUnaryS.dst->index > 7)
   3391                         goto bad;
   3392                      opc = X1000 | i->ARMin.NUnaryS.dst->index;
   3393                      break;
   3394                   case 1:
   3395                      if (i->ARMin.NUnaryS.dst->index > 3)
   3396                         goto bad;
   3397                      opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
   3398                      break;
   3399                   case 2:
   3400                      if (i->ARMin.NUnaryS.dst->index > 1)
   3401                         goto bad;
   3402                      opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
   3403                      break;
   3404                   default:
   3405                      goto bad;
   3406                }
   3407                opc1 = (opc >> 2) & 3;
   3408                opc2 = opc & 3;
   3409                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
   3410                                regD, regM, X1011,
   3411                                BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
   3412                *p++ = insn;
   3413                goto done;
   3414             case ARMneon_GETELEMU:
   3415                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   3416                                 dregNo(i->ARMin.NUnaryS.src->reg);
   3417                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   3418                M = regM >> 4;
   3419                D = regD >> 4;
   3420                regM &= 0xF;
   3421                regD &= 0xF;
   3422                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3423                   goto bad;
   3424                switch (size) {
   3425                   case 0:
   3426                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   3427                         regM++;
   3428                         i->ARMin.NUnaryS.src->index -= 8;
   3429                      }
   3430                      if (i->ARMin.NUnaryS.src->index > 7)
   3431                         goto bad;
   3432                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   3433                      break;
   3434                   case 1:
   3435                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   3436                         regM++;
   3437                         i->ARMin.NUnaryS.src->index -= 4;
   3438                      }
   3439                      if (i->ARMin.NUnaryS.src->index > 3)
   3440                         goto bad;
   3441                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   3442                      break;
   3443                   case 2:
   3444                      goto bad;
   3445                   default:
   3446                      goto bad;
   3447                }
   3448                opc1 = (opc >> 2) & 3;
   3449                opc2 = opc & 3;
   3450                insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
   3451                                regM, regD, X1011,
   3452                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   3453                *p++ = insn;
   3454                goto done;
   3455             case ARMneon_GETELEMS:
   3456                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   3457                                 dregNo(i->ARMin.NUnaryS.src->reg);
   3458                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   3459                M = regM >> 4;
   3460                D = regD >> 4;
   3461                regM &= 0xF;
   3462                regD &= 0xF;
   3463                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3464                   goto bad;
   3465                switch (size) {
   3466                   case 0:
   3467                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   3468                         regM++;
   3469                         i->ARMin.NUnaryS.src->index -= 8;
   3470                      }
   3471                      if (i->ARMin.NUnaryS.src->index > 7)
   3472                         goto bad;
   3473                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   3474                      break;
   3475                   case 1:
   3476                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   3477                         regM++;
   3478                         i->ARMin.NUnaryS.src->index -= 4;
   3479                      }
   3480                      if (i->ARMin.NUnaryS.src->index > 3)
   3481                         goto bad;
   3482                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   3483                      break;
   3484                   case 2:
   3485                      if (Q && i->ARMin.NUnaryS.src->index > 1) {
   3486                         regM++;
   3487                         i->ARMin.NUnaryS.src->index -= 2;
   3488                      }
   3489                      if (i->ARMin.NUnaryS.src->index > 1)
   3490                         goto bad;
   3491                      opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
   3492                      break;
   3493                   default:
   3494                      goto bad;
   3495                }
   3496                opc1 = (opc >> 2) & 3;
   3497                opc2 = opc & 3;
   3498                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
   3499                                regM, regD, X1011,
   3500                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   3501                *p++ = insn;
   3502                goto done;
   3503             default:
   3504                goto bad;
   3505          }
   3506       }
   3507       case ARMin_NUnary: {
   3508          UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
   3509          UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
   3510                        ? (qregNo(i->ARMin.NUnary.dst) << 1)
   3511                        : dregNo(i->ARMin.NUnary.dst);
   3512          UInt regM, M;
   3513          UInt D = regD >> 4;
   3514          UInt sz1 = i->ARMin.NUnary.size >> 1;
   3515          UInt sz2 = i->ARMin.NUnary.size & 1;
   3516          UInt sz = i->ARMin.NUnary.size;
   3517          UInt insn;
   3518          UInt F = 0; /* TODO: floating point EQZ ??? */
   3519          if (i->ARMin.NUnary.op != ARMneon_DUP) {
   3520             regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
   3521                      ? (qregNo(i->ARMin.NUnary.src) << 1)
   3522                      : dregNo(i->ARMin.NUnary.src);
   3523             M = regM >> 4;
   3524          } else {
   3525             regM = iregNo(i->ARMin.NUnary.src);
   3526             M = regM >> 4;
   3527          }
   3528          regD &= 0xF;
   3529          regM &= 0xF;
   3530          switch (i->ARMin.NUnary.op) {
   3531             case ARMneon_COPY: /* VMOV reg, reg */
   3532                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
   3533                                BITS4(M,Q,M,1), regM);
   3534                break;
   3535             case ARMneon_COPYN: /* VMOVN regD, regQ */
   3536                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3537                                regD, X0010, BITS4(0,0,M,0), regM);
   3538                break;
   3539             case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
   3540                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3541                                regD, X0010, BITS4(1,0,M,0), regM);
   3542                break;
   3543             case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
   3544                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3545                                regD, X0010, BITS4(0,1,M,0), regM);
   3546                break;
   3547             case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
   3548                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3549                                regD, X0010, BITS4(1,1,M,0), regM);
   3550                break;
   3551             case ARMneon_COPYLS: /* VMOVL regQ, regD */
   3552                if (sz >= 3)
   3553                   goto bad;
   3554                insn = XXXXXXXX(0xF, X0010,
   3555                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   3556                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   3557                                regD, X1010, BITS4(0,0,M,1), regM);
   3558                break;
   3559             case ARMneon_COPYLU: /* VMOVL regQ, regD */
   3560                if (sz >= 3)
   3561                   goto bad;
   3562                insn = XXXXXXXX(0xF, X0011,
   3563                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   3564                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   3565                                regD, X1010, BITS4(0,0,M,1), regM);
   3566                break;
   3567             case ARMneon_NOT: /* VMVN reg, reg*/
   3568                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   3569                                BITS4(1,Q,M,0), regM);
   3570                break;
   3571             case ARMneon_EQZ:
   3572                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   3573                                regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
   3574                break;
   3575             case ARMneon_CNT:
   3576                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   3577                                BITS4(0,Q,M,0), regM);
   3578                break;
   3579             case ARMneon_CLZ:
   3580                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3581                                regD, X0100, BITS4(1,Q,M,0), regM);
   3582                break;
   3583             case ARMneon_CLS:
   3584                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3585                                regD, X0100, BITS4(0,Q,M,0), regM);
   3586                break;
   3587             case ARMneon_ABS:
   3588                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   3589                                regD, X0011, BITS4(0,Q,M,0), regM);
   3590                break;
   3591             case ARMneon_DUP:
   3592                sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
   3593                sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
   3594                vassert(sz1 + sz2 < 2);
   3595                insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
   3596                                X1011, BITS4(D,0,sz2,1), X0000);
   3597                break;
   3598             case ARMneon_REV16:
   3599                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3600                                regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
   3601                break;
   3602             case ARMneon_REV32:
   3603                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3604                                regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
   3605                break;
   3606             case ARMneon_REV64:
   3607                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3608                                regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
   3609                break;
   3610             case ARMneon_PADDLU:
   3611                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3612                                regD, X0010, BITS4(1,Q,M,0), regM);
   3613                break;
   3614             case ARMneon_PADDLS:
   3615                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3616                                regD, X0010, BITS4(0,Q,M,0), regM);
   3617                break;
   3618             case ARMneon_VQSHLNUU:
   3619                insn = XXXXXXXX(0xF, X0011,
   3620                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3621                                sz & 0xf, regD, X0111,
   3622                                BITS4(sz >> 6,Q,M,1), regM);
   3623                break;
   3624             case ARMneon_VQSHLNSS:
   3625                insn = XXXXXXXX(0xF, X0010,
   3626                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3627                                sz & 0xf, regD, X0111,
   3628                                BITS4(sz >> 6,Q,M,1), regM);
   3629                break;
   3630             case ARMneon_VQSHLNUS:
   3631                insn = XXXXXXXX(0xF, X0011,
   3632                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3633                                sz & 0xf, regD, X0110,
   3634                                BITS4(sz >> 6,Q,M,1), regM);
   3635                break;
   3636             case ARMneon_VCVTFtoS:
   3637                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   3638                                BITS4(0,Q,M,0), regM);
   3639                break;
   3640             case ARMneon_VCVTFtoU:
   3641                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   3642                                BITS4(1,Q,M,0), regM);
   3643                break;
   3644             case ARMneon_VCVTStoF:
   3645                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   3646                                BITS4(0,Q,M,0), regM);
   3647                break;
   3648             case ARMneon_VCVTUtoF:
   3649                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   3650                                BITS4(1,Q,M,0), regM);
   3651                break;
   3652             case ARMneon_VCVTFtoFixedU:
   3653                sz1 = (sz >> 5) & 1;
   3654                sz2 = (sz >> 4) & 1;
   3655                sz &= 0xf;
   3656                insn = XXXXXXXX(0xF, X0011,
   3657                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   3658                                BITS4(0,Q,M,1), regM);
   3659                break;
   3660             case ARMneon_VCVTFtoFixedS:
   3661                sz1 = (sz >> 5) & 1;
   3662                sz2 = (sz >> 4) & 1;
   3663                sz &= 0xf;
   3664                insn = XXXXXXXX(0xF, X0010,
   3665                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   3666                                BITS4(0,Q,M,1), regM);
   3667                break;
   3668             case ARMneon_VCVTFixedUtoF:
   3669                sz1 = (sz >> 5) & 1;
   3670                sz2 = (sz >> 4) & 1;
   3671                sz &= 0xf;
   3672                insn = XXXXXXXX(0xF, X0011,
   3673                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   3674                                BITS4(0,Q,M,1), regM);
   3675                break;
   3676             case ARMneon_VCVTFixedStoF:
   3677                sz1 = (sz >> 5) & 1;
   3678                sz2 = (sz >> 4) & 1;
   3679                sz &= 0xf;
   3680                insn = XXXXXXXX(0xF, X0010,
   3681                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   3682                                BITS4(0,Q,M,1), regM);
   3683                break;
   3684             case ARMneon_VCVTF32toF16:
   3685                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
   3686                                BITS4(0,0,M,0), regM);
   3687                break;
   3688             case ARMneon_VCVTF16toF32:
   3689                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
   3690                                BITS4(0,0,M,0), regM);
   3691                break;
   3692             case ARMneon_VRECIP:
   3693                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   3694                                BITS4(0,Q,M,0), regM);
   3695                break;
   3696             case ARMneon_VRECIPF:
   3697                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   3698                                BITS4(0,Q,M,0), regM);
   3699                break;
   3700             case ARMneon_VABSFP:
   3701                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   3702                                BITS4(0,Q,M,0), regM);
   3703                break;
   3704             case ARMneon_VRSQRTEFP:
   3705                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   3706                                BITS4(1,Q,M,0), regM);
   3707                break;
   3708             case ARMneon_VRSQRTE:
   3709                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   3710                                BITS4(1,Q,M,0), regM);
   3711                break;
   3712             case ARMneon_VNEGF:
   3713                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   3714                                BITS4(1,Q,M,0), regM);
   3715                break;
   3716 
   3717             default:
   3718                goto bad;
   3719          }
   3720          *p++ = insn;
   3721          goto done;
   3722       }
   3723       case ARMin_NDual: {
   3724          UInt Q = i->ARMin.NDual.Q ? 1 : 0;
   3725          UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
   3726                        ? (qregNo(i->ARMin.NDual.arg1) << 1)
   3727                        : dregNo(i->ARMin.NDual.arg1);
   3728          UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
   3729                        ? (qregNo(i->ARMin.NDual.arg2) << 1)
   3730                        : dregNo(i->ARMin.NDual.arg2);
   3731          UInt D = regD >> 4;
   3732          UInt M = regM >> 4;
   3733          UInt sz1 = i->ARMin.NDual.size >> 1;
   3734          UInt sz2 = i->ARMin.NDual.size & 1;
   3735          UInt insn;
   3736          regD &= 0xF;
   3737          regM &= 0xF;
   3738          switch (i->ARMin.NDual.op) {
   3739             case ARMneon_TRN: /* VTRN reg, reg */
   3740                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3741                                regD, X0000, BITS4(1,Q,M,0), regM);
   3742                break;
   3743             case ARMneon_ZIP: /* VZIP reg, reg */
   3744                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3745                                regD, X0001, BITS4(1,Q,M,0), regM);
   3746                break;
   3747             case ARMneon_UZP: /* VUZP reg, reg */
   3748                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3749                                regD, X0001, BITS4(0,Q,M,0), regM);
   3750                break;
   3751             default:
   3752                goto bad;
   3753          }
   3754          *p++ = insn;
   3755          goto done;
   3756       }
   3757       case ARMin_NBinary: {
   3758          UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
   3759          UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
   3760                        ? (qregNo(i->ARMin.NBinary.dst) << 1)
   3761                        : dregNo(i->ARMin.NBinary.dst);
   3762          UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
   3763                        ? (qregNo(i->ARMin.NBinary.argL) << 1)
   3764                        : dregNo(i->ARMin.NBinary.argL);
   3765          UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
   3766                        ? (qregNo(i->ARMin.NBinary.argR) << 1)
   3767                        : dregNo(i->ARMin.NBinary.argR);
   3768          UInt sz1 = i->ARMin.NBinary.size >> 1;
   3769          UInt sz2 = i->ARMin.NBinary.size & 1;
   3770          UInt D = regD >> 4;
   3771          UInt N = regN >> 4;
   3772          UInt M = regM >> 4;
   3773          UInt insn;
   3774          regD &= 0xF;
   3775          regM &= 0xF;
   3776          regN &= 0xF;
   3777          switch (i->ARMin.NBinary.op) {
   3778             case ARMneon_VAND: /* VAND reg, reg, reg */
   3779                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
   3780                                BITS4(N,Q,M,1), regM);
   3781                break;
   3782             case ARMneon_VORR: /* VORR reg, reg, reg*/
   3783                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
   3784                                BITS4(N,Q,M,1), regM);
   3785                break;
   3786             case ARMneon_VXOR: /* VEOR reg, reg, reg */
   3787                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
   3788                                BITS4(N,Q,M,1), regM);
   3789                break;
   3790             case ARMneon_VADD: /* VADD reg, reg, reg */
   3791                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3792                                X1000, BITS4(N,Q,M,0), regM);
   3793                break;
   3794             case ARMneon_VSUB: /* VSUB reg, reg, reg */
   3795                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3796                                X1000, BITS4(N,Q,M,0), regM);
   3797                break;
   3798             case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
   3799                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3800                                X0110, BITS4(N,Q,M,1), regM);
   3801                break;
   3802             case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
   3803                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3804                                X0110, BITS4(N,Q,M,1), regM);
   3805                break;
   3806             case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
   3807                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3808                                X0110, BITS4(N,Q,M,0), regM);
   3809                break;
   3810             case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
   3811                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3812                                X0110, BITS4(N,Q,M,0), regM);
   3813                break;
   3814             case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
   3815                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3816                                X0001, BITS4(N,Q,M,0), regM);
   3817                break;
   3818             case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
   3819                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3820                                X0001, BITS4(N,Q,M,0), regM);
   3821                break;
   3822             case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
   3823                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3824                                X0000, BITS4(N,Q,M,1), regM);
   3825                break;
   3826             case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
   3827                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3828                                X0000, BITS4(N,Q,M,1), regM);
   3829                break;
   3830             case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
   3831                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3832                                X0010, BITS4(N,Q,M,1), regM);
   3833                break;
   3834             case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
   3835                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3836                                X0010, BITS4(N,Q,M,1), regM);
   3837                break;
   3838             case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
   3839                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3840                                X0011, BITS4(N,Q,M,0), regM);
   3841                break;
   3842             case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
   3843                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3844                                X0011, BITS4(N,Q,M,0), regM);
   3845                break;
   3846             case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
   3847                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3848                                X0011, BITS4(N,Q,M,1), regM);
   3849                break;
   3850             case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
   3851                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3852                                X0011, BITS4(N,Q,M,1), regM);
   3853                break;
   3854             case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
   3855                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3856                                X1000, BITS4(N,Q,M,1), regM);
   3857                break;
   3858             case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
   3859                if (i->ARMin.NBinary.size >= 16)
   3860                   goto bad;
   3861                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
   3862                                i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
   3863                                regM);
   3864                break;
   3865             case ARMneon_VMUL:
   3866                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3867                                X1001, BITS4(N,Q,M,1), regM);
   3868                break;
   3869             case ARMneon_VMULLU:
   3870                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
   3871                                X1100, BITS4(N,0,M,0), regM);
   3872                break;
   3873             case ARMneon_VMULLS:
   3874                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   3875                                X1100, BITS4(N,0,M,0), regM);
   3876                break;
   3877             case ARMneon_VMULP:
   3878                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3879                                X1001, BITS4(N,Q,M,1), regM);
   3880                break;
   3881             case ARMneon_VMULFP:
   3882                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   3883                                X1101, BITS4(N,Q,M,1), regM);
   3884                break;
   3885             case ARMneon_VMULLP:
   3886                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   3887                                X1110, BITS4(N,0,M,0), regM);
   3888                break;
   3889             case ARMneon_VQDMULH:
   3890                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3891                                X1011, BITS4(N,Q,M,0), regM);
   3892                break;
   3893             case ARMneon_VQRDMULH:
   3894                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3895                                X1011, BITS4(N,Q,M,0), regM);
   3896                break;
   3897             case ARMneon_VQDMULL:
   3898                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   3899                                X1101, BITS4(N,0,M,0), regM);
   3900                break;
   3901             case ARMneon_VTBL:
   3902                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
   3903                                X1000, BITS4(N,0,M,0), regM);
   3904                break;
   3905             case ARMneon_VPADD:
   3906                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3907                                X1011, BITS4(N,Q,M,1), regM);
   3908                break;
   3909             case ARMneon_VPADDFP:
   3910                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   3911                                X1101, BITS4(N,Q,M,0), regM);
   3912                break;
   3913             case ARMneon_VPMINU:
   3914                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3915                                X1010, BITS4(N,Q,M,1), regM);
   3916                break;
   3917             case ARMneon_VPMINS:
   3918                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3919                                X1010, BITS4(N,Q,M,1), regM);
   3920                break;
   3921             case ARMneon_VPMAXU:
   3922                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3923                                X1010, BITS4(N,Q,M,0), regM);
   3924                break;
   3925             case ARMneon_VPMAXS:
   3926                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3927                                X1010, BITS4(N,Q,M,0), regM);
   3928                break;
   3929             case ARMneon_VADDFP: /* VADD reg, reg, reg */
   3930                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   3931                                X1101, BITS4(N,Q,M,0), regM);
   3932                break;
   3933             case ARMneon_VSUBFP: /* VADD reg, reg, reg */
   3934                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   3935                                X1101, BITS4(N,Q,M,0), regM);
   3936                break;
   3937             case ARMneon_VABDFP: /* VABD reg, reg, reg */
   3938                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   3939                                X1101, BITS4(N,Q,M,0), regM);
   3940                break;
   3941             case ARMneon_VMINF:
   3942                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   3943                                X1111, BITS4(N,Q,M,0), regM);
   3944                break;
   3945             case ARMneon_VMAXF:
   3946                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   3947                                X1111, BITS4(N,Q,M,0), regM);
   3948                break;
   3949             case ARMneon_VPMINF:
   3950                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   3951                                X1111, BITS4(N,Q,M,0), regM);
   3952                break;
   3953             case ARMneon_VPMAXF:
   3954                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   3955                                X1111, BITS4(N,Q,M,0), regM);
   3956                break;
   3957             case ARMneon_VRECPS:
   3958                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
   3959                                BITS4(N,Q,M,1), regM);
   3960                break;
   3961             case ARMneon_VCGTF:
   3962                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
   3963                                BITS4(N,Q,M,0), regM);
   3964                break;
   3965             case ARMneon_VCGEF:
   3966                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
   3967                                BITS4(N,Q,M,0), regM);
   3968                break;
   3969             case ARMneon_VCEQF:
   3970                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
   3971                                BITS4(N,Q,M,0), regM);
   3972                break;
   3973             case ARMneon_VRSQRTS:
   3974                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
   3975                                BITS4(N,Q,M,1), regM);
   3976                break;
   3977             default:
   3978                goto bad;
   3979          }
   3980          *p++ = insn;
   3981          goto done;
   3982       }
   3983       case ARMin_NShift: {
   3984          UInt Q = i->ARMin.NShift.Q ? 1 : 0;
   3985          UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
   3986                        ? (qregNo(i->ARMin.NShift.dst) << 1)
   3987                        : dregNo(i->ARMin.NShift.dst);
   3988          UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
   3989                        ? (qregNo(i->ARMin.NShift.argL) << 1)
   3990                        : dregNo(i->ARMin.NShift.argL);
   3991          UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
   3992                        ? (qregNo(i->ARMin.NShift.argR) << 1)
   3993                        : dregNo(i->ARMin.NShift.argR);
   3994          UInt sz1 = i->ARMin.NShift.size >> 1;
   3995          UInt sz2 = i->ARMin.NShift.size & 1;
   3996          UInt D = regD >> 4;
   3997          UInt N = regN >> 4;
   3998          UInt M = regM >> 4;
   3999          UInt insn;
   4000          regD &= 0xF;
   4001          regM &= 0xF;
   4002          regN &= 0xF;
   4003          switch (i->ARMin.NShift.op) {
   4004             case ARMneon_VSHL:
   4005                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4006                                X0100, BITS4(N,Q,M,0), regM);
   4007                break;
   4008             case ARMneon_VSAL:
   4009                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4010                                X0100, BITS4(N,Q,M,0), regM);
   4011                break;
   4012             case ARMneon_VQSHL:
   4013                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4014                                X0100, BITS4(N,Q,M,1), regM);
   4015                break;
   4016             case ARMneon_VQSAL:
   4017                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4018                                X0100, BITS4(N,Q,M,1), regM);
   4019                break;
   4020             default:
   4021                goto bad;
   4022          }
   4023          *p++ = insn;
   4024          goto done;
   4025       }
   4026       case ARMin_NeonImm: {
   4027          UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
   4028          UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
   4029                           dregNo(i->ARMin.NeonImm.dst);
   4030          UInt D = regD >> 4;
   4031          UInt imm = i->ARMin.NeonImm.imm->imm8;
   4032          UInt tp = i->ARMin.NeonImm.imm->type;
   4033          UInt j = imm >> 7;
   4034          UInt imm3 = (imm >> 4) & 0x7;
   4035          UInt imm4 = imm & 0xF;
   4036          UInt cmode, op;
   4037          UInt insn;
   4038          regD &= 0xF;
   4039          if (tp == 9)
   4040             op = 1;
   4041          else
   4042             op = 0;
   4043          switch (tp) {
   4044             case 0:
   4045             case 1:
   4046             case 2:
   4047             case 3:
   4048             case 4:
   4049             case 5:
   4050                cmode = tp << 1;
   4051                break;
   4052             case 9:
   4053             case 6:
   4054                cmode = 14;
   4055                break;
   4056             case 7:
   4057                cmode = 12;
   4058                break;
   4059             case 8:
   4060                cmode = 13;
   4061                break;
   4062             case 10:
   4063                cmode = 15;
   4064                break;
   4065             default:
   4066                vpanic("ARMin_NeonImm");
   4067 
   4068          }
   4069          insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
   4070                          cmode, BITS4(0,Q,op,1), imm4);
   4071          *p++ = insn;
   4072          goto done;
   4073       }
   4074       case ARMin_NCMovQ: {
   4075          UInt cc = (UInt)i->ARMin.NCMovQ.cond;
   4076          UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
   4077          UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
   4078          UInt vM = qM & 0xF;
   4079          UInt vD = qD & 0xF;
   4080          UInt M  = (qM >> 4) & 1;
   4081          UInt D  = (qD >> 4) & 1;
   4082          vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
   4083          /* b!cc here+8: !cc A00 0000 */
   4084          UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
   4085          *p++ = insn;
   4086          /* vmov qD, qM */
   4087          insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
   4088                          vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
   4089          *p++ = insn;
   4090          goto done;
   4091       }
   4092       case ARMin_Add32: {
   4093          UInt regD = iregNo(i->ARMin.Add32.rD);
   4094          UInt regN = iregNo(i->ARMin.Add32.rN);
   4095          UInt imm32 = i->ARMin.Add32.imm32;
   4096          vassert(regD != regN);
   4097          /* MOV regD, imm32 */
   4098          p = imm32_to_iregNo((UInt *)p, regD, imm32);
   4099          /* ADD regD, regN, regD */
   4100          UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
   4101          *p++ = insn;
   4102          goto done;
   4103       }
   4104       /* ... */
   4105       default:
   4106          goto bad;
   4107     }
   4108 
   4109   bad:
   4110    ppARMInstr(i);
   4111    vpanic("emit_ARMInstr");
   4112    /*NOTREACHED*/
   4113 
   4114   done:
   4115    vassert(((UChar*)p) - &buf[0] <= 32);
   4116    return ((UChar*)p) - &buf[0];
   4117 }
   4118 
   4119 #undef BITS4
   4120 #undef X0000
   4121 #undef X0001
   4122 #undef X0010
   4123 #undef X0011
   4124 #undef X0100
   4125 #undef X0101
   4126 #undef X0110
   4127 #undef X0111
   4128 #undef X1000
   4129 #undef X1001
   4130 #undef X1010
   4131 #undef X1011
   4132 #undef X1100
   4133 #undef X1101
   4134 #undef X1110
   4135 #undef X1111
   4136 #undef XXXXX___
   4137 #undef XXXXXX__
   4138 #undef XXX___XX
   4139 #undef XXXXX__X
   4140 #undef XXXXXXXX
   4141 
   4142 /*---------------------------------------------------------------*/
   4143 /*--- end                                     host_arm_defs.c ---*/
   4144 /*---------------------------------------------------------------*/
   4145