Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_arm_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2010 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2010 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_arm_defs.h"
     43 
     44 UInt arm_hwcaps = 0;
     45 
     46 
     47 /* --------- Registers. --------- */
     48 
     49 /* The usual HReg abstraction.
     50    There are 16 general purpose regs.
     51 */
     52 
     53 void ppHRegARM ( HReg reg )  {
     54    Int r;
     55    /* Be generic for all virtual regs. */
     56    if (hregIsVirtual(reg)) {
     57       ppHReg(reg);
     58       return;
     59    }
     60    /* But specific for real regs. */
     61    switch (hregClass(reg)) {
     62       case HRcInt32:
     63          r = hregNumber(reg);
     64          vassert(r >= 0 && r < 16);
     65          vex_printf("r%d", r);
     66          return;
     67       case HRcFlt64:
     68          r = hregNumber(reg);
     69          vassert(r >= 0 && r < 32);
     70          vex_printf("d%d", r);
     71          return;
     72       case HRcFlt32:
     73          r = hregNumber(reg);
     74          vassert(r >= 0 && r < 32);
     75          vex_printf("s%d", r);
     76          return;
     77       case HRcVec128:
     78          r = hregNumber(reg);
     79          vassert(r >= 0 && r < 16);
     80          vex_printf("q%d", r);
     81          return;
     82       default:
     83          vpanic("ppHRegARM");
     84    }
     85 }
     86 
     87 HReg hregARM_R0  ( void ) { return mkHReg(0,  HRcInt32, False); }
     88 HReg hregARM_R1  ( void ) { return mkHReg(1,  HRcInt32, False); }
     89 HReg hregARM_R2  ( void ) { return mkHReg(2,  HRcInt32, False); }
     90 HReg hregARM_R3  ( void ) { return mkHReg(3,  HRcInt32, False); }
     91 HReg hregARM_R4  ( void ) { return mkHReg(4,  HRcInt32, False); }
     92 HReg hregARM_R5  ( void ) { return mkHReg(5,  HRcInt32, False); }
     93 HReg hregARM_R6  ( void ) { return mkHReg(6,  HRcInt32, False); }
     94 HReg hregARM_R7  ( void ) { return mkHReg(7,  HRcInt32, False); }
     95 HReg hregARM_R8  ( void ) { return mkHReg(8,  HRcInt32, False); }
     96 HReg hregARM_R9  ( void ) { return mkHReg(9,  HRcInt32, False); }
     97 HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
     98 HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
     99 HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
    100 HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
    101 HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
    102 HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
    103 HReg hregARM_D8  ( void ) { return mkHReg(8,  HRcFlt64, False); }
    104 HReg hregARM_D9  ( void ) { return mkHReg(9,  HRcFlt64, False); }
    105 HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
    106 HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
    107 HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
    108 HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
    109 HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
    110 HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
    111 HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
    112 HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
    113 HReg hregARM_Q8  ( void ) { return mkHReg(8,  HRcVec128, False); }
    114 HReg hregARM_Q9  ( void ) { return mkHReg(9,  HRcVec128, False); }
    115 HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
    116 HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
    117 HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
    118 HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
    119 HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
    120 HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
    121 
    122 void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
    123 {
    124    Int i = 0;
    125    *nregs = 26;
    126    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    127    // callee saves ones are listed first, since we prefer them
    128    // if they're available
    129    (*arr)[i++] = hregARM_R4();
    130    (*arr)[i++] = hregARM_R5();
    131    (*arr)[i++] = hregARM_R6();
    132    (*arr)[i++] = hregARM_R7();
    133    (*arr)[i++] = hregARM_R10();
    134    (*arr)[i++] = hregARM_R11();
    135    // otherwise we'll have to slum it out with caller-saves ones
    136    (*arr)[i++] = hregARM_R0();
    137    (*arr)[i++] = hregARM_R1();
    138    (*arr)[i++] = hregARM_R2();
    139    (*arr)[i++] = hregARM_R3();
    140    (*arr)[i++] = hregARM_R9();
    141    // FP hreegisters.  Note: these are all callee-save.  Yay!
    142    // Hence we don't need to mention them as trashed in
    143    // getHRegUsage for ARMInstr_Call.
    144    (*arr)[i++] = hregARM_D8();
    145    (*arr)[i++] = hregARM_D9();
    146    (*arr)[i++] = hregARM_D10();
    147    (*arr)[i++] = hregARM_D11();
    148    (*arr)[i++] = hregARM_D12();
    149    (*arr)[i++] = hregARM_S26();
    150    (*arr)[i++] = hregARM_S27();
    151    (*arr)[i++] = hregARM_S28();
    152    (*arr)[i++] = hregARM_S29();
    153    (*arr)[i++] = hregARM_S30();
    154 
    155    (*arr)[i++] = hregARM_Q8();
    156    (*arr)[i++] = hregARM_Q9();
    157    (*arr)[i++] = hregARM_Q10();
    158    (*arr)[i++] = hregARM_Q11();
    159    (*arr)[i++] = hregARM_Q12();
    160 
    161    //(*arr)[i++] = hregARM_Q13();
    162    //(*arr)[i++] = hregARM_Q14();
    163    //(*arr)[i++] = hregARM_Q15();
    164 
    165    // unavail: r8 as GSP
    166    // r12 is used as a spill/reload temporary
    167    // r13 as SP
    168    // r14 as LR
    169    // r15 as PC
    170    //
    171    // All in all, we have 11 allocatable integer registers:
    172    // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
    173    // and r12 dedicated as a spill temporary.
    174    // 13 14 and 15 are not under the allocator's control.
    175    //
    176    // Hence for the allocatable registers we have:
    177    //
    178    // callee-saved: 4 5 6 7 (8) 9 10 11
    179    // caller-saved: 0 1 2 3
    180    // Note 9 is ambiguous: the base EABI does not give an e/r-saved
    181    // designation for it, but the Linux instantiation of the ABI
    182    // specifies it as callee-saved.
    183    //
    184    // If the set of available registers changes or if the e/r status
    185    // changes, be sure to re-check/sync the definition of
    186    // getHRegUsage for ARMInstr_Call too.
    187    vassert(i == *nregs);
    188 }
    189 
    190 
    191 
    192 /* --------- Condition codes, ARM encoding. --------- */
    193 
    194 HChar* showARMCondCode ( ARMCondCode cond ) {
    195    switch (cond) {
    196        case ARMcc_EQ:  return "eq";
    197        case ARMcc_NE:  return "ne";
    198        case ARMcc_HS:  return "hs";
    199        case ARMcc_LO:  return "lo";
    200        case ARMcc_MI:  return "mi";
    201        case ARMcc_PL:  return "pl";
    202        case ARMcc_VS:  return "vs";
    203        case ARMcc_VC:  return "vc";
    204        case ARMcc_HI:  return "hi";
    205        case ARMcc_LS:  return "ls";
    206        case ARMcc_GE:  return "ge";
    207        case ARMcc_LT:  return "lt";
    208        case ARMcc_GT:  return "gt";
    209        case ARMcc_LE:  return "le";
    210        case ARMcc_AL:  return "al"; // default
    211        case ARMcc_NV:  return "nv";
    212        default: vpanic("showARMCondCode");
    213    }
    214 }
    215 
    216 
    217 /* --------- Mem AModes: Addressing Mode 1 --------- */
    218 
    219 ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
    220    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
    221    am->tag              = ARMam1_RI;
    222    am->ARMam1.RI.reg    = reg;
    223    am->ARMam1.RI.simm13 = simm13;
    224    vassert(-4095 <= simm13 && simm13 <= 4095);
    225    return am;
    226 }
    227 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
    228    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
    229    am->tag              = ARMam1_RRS;
    230    am->ARMam1.RRS.base  = base;
    231    am->ARMam1.RRS.index = index;
    232    am->ARMam1.RRS.shift = shift;
    233    vassert(0 <= shift && shift <= 3);
    234    return am;
    235 }
    236 
    237 void ppARMAMode1 ( ARMAMode1* am ) {
    238    switch (am->tag) {
    239       case ARMam1_RI:
    240          vex_printf("%d(", am->ARMam1.RI.simm13);
    241          ppHRegARM(am->ARMam1.RI.reg);
    242          vex_printf(")");
    243          break;
    244       case ARMam1_RRS:
    245          vex_printf("(");
    246          ppHRegARM(am->ARMam1.RRS.base);
    247          vex_printf(",");
    248          ppHRegARM(am->ARMam1.RRS.index);
    249          vex_printf(",%u)", am->ARMam1.RRS.shift);
    250          break;
    251       default:
    252          vassert(0);
    253    }
    254 }
    255 
    256 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
    257    switch (am->tag) {
    258       case ARMam1_RI:
    259          addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
    260          return;
    261       case ARMam1_RRS:
    262          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
    263          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
    264          //   return;
    265       default:
    266          vpanic("addRegUsage_ARMAmode1");
    267    }
    268 }
    269 
    270 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
    271    switch (am->tag) {
    272       case ARMam1_RI:
    273          am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
    274          return;
    275       case ARMam1_RRS:
    276          //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
    277          //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
    278          //return;
    279       default:
    280          vpanic("mapRegs_ARMAmode1");
    281    }
    282 }
    283 
    284 
    285 /* --------- Mem AModes: Addressing Mode 2 --------- */
    286 
    287 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
    288    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    289    am->tag             = ARMam2_RI;
    290    am->ARMam2.RI.reg   = reg;
    291    am->ARMam2.RI.simm9 = simm9;
    292    vassert(-255 <= simm9 && simm9 <= 255);
    293    return am;
    294 }
    295 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
    296    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    297    am->tag             = ARMam2_RR;
    298    am->ARMam2.RR.base  = base;
    299    am->ARMam2.RR.index = index;
    300    return am;
    301 }
    302 
    303 void ppARMAMode2 ( ARMAMode2* am ) {
    304    switch (am->tag) {
    305       case ARMam2_RI:
    306          vex_printf("%d(", am->ARMam2.RI.simm9);
    307          ppHRegARM(am->ARMam2.RI.reg);
    308          vex_printf(")");
    309          break;
    310       case ARMam2_RR:
    311          vex_printf("(");
    312          ppHRegARM(am->ARMam2.RR.base);
    313          vex_printf(",");
    314          ppHRegARM(am->ARMam2.RR.index);
    315          vex_printf(")");
    316          break;
    317       default:
    318          vassert(0);
    319    }
    320 }
    321 
    322 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
    323    switch (am->tag) {
    324       case ARMam2_RI:
    325          addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
    326          return;
    327       case ARMam2_RR:
    328          //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
    329          //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
    330          //   return;
    331       default:
    332          vpanic("addRegUsage_ARMAmode2");
    333    }
    334 }
    335 
    336 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
    337    switch (am->tag) {
    338       case ARMam2_RI:
    339          am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
    340          return;
    341       case ARMam2_RR:
    342          //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
    343          //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
    344          //return;
    345       default:
    346          vpanic("mapRegs_ARMAmode2");
    347    }
    348 }
    349 
    350 
    351 /* --------- Mem AModes: Addressing Mode VFP --------- */
    352 
    353 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
    354    ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
    355    vassert(simm11 >= -1020 && simm11 <= 1020);
    356    vassert(0 == (simm11 & 3));
    357    am->reg    = reg;
    358    am->simm11 = simm11;
    359    return am;
    360 }
    361 
    362 void ppARMAModeV ( ARMAModeV* am ) {
    363    vex_printf("%d(", am->simm11);
    364    ppHRegARM(am->reg);
    365    vex_printf(")");
    366 }
    367 
    368 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
    369    addHRegUse(u, HRmRead, am->reg);
    370 }
    371 
    372 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
    373    am->reg = lookupHRegRemap(m, am->reg);
    374 }
    375 
    376 
    377 /* --------- Mem AModes: Addressing Mode Neon ------- */
    378 
    379 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
    380    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    381    am->tag = ARMamN_RR;
    382    am->ARMamN.RR.rN = rN;
    383    am->ARMamN.RR.rM = rM;
    384    return am;
    385 }
    386 
    387 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
    388    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    389    am->tag = ARMamN_R;
    390    am->ARMamN.R.rN = rN;
    391    return am;
    392 }
    393 
    394 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
    395    if (am->tag == ARMamN_R) {
    396       addHRegUse(u, HRmRead, am->ARMamN.R.rN);
    397    } else {
    398       addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
    399       addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
    400    }
    401 }
    402 
    403 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
    404    if (am->tag == ARMamN_R) {
    405       am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
    406    } else {
    407       am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
    408       am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
    409    }
    410 }
    411 
    412 void ppARMAModeN ( ARMAModeN* am ) {
    413    vex_printf("[");
    414    if (am->tag == ARMamN_R) {
    415       ppHRegARM(am->ARMamN.R.rN);
    416    } else {
    417       ppHRegARM(am->ARMamN.RR.rN);
    418    }
    419    vex_printf("]");
    420    if (am->tag == ARMamN_RR) {
    421       vex_printf(", ");
    422       ppHRegARM(am->ARMamN.RR.rM);
    423    }
    424 }
    425 
    426 
    427 /* --------- Reg or imm-8x4 operands --------- */
    428 
    429 static UInt ROR32 ( UInt x, UInt sh ) {
    430    vassert(sh >= 0 && sh < 32);
    431    if (sh == 0)
    432       return x;
    433    else
    434       return (x << (32-sh)) | (x >> sh);
    435 }
    436 
    437 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
    438    ARMRI84* ri84          = LibVEX_Alloc(sizeof(ARMRI84));
    439    ri84->tag              = ARMri84_I84;
    440    ri84->ARMri84.I84.imm8 = imm8;
    441    ri84->ARMri84.I84.imm4 = imm4;
    442    vassert(imm8 >= 0 && imm8 <= 255);
    443    vassert(imm4 >= 0 && imm4 <= 15);
    444    return ri84;
    445 }
    446 ARMRI84* ARMRI84_R ( HReg reg ) {
    447    ARMRI84* ri84       = LibVEX_Alloc(sizeof(ARMRI84));
    448    ri84->tag           = ARMri84_R;
    449    ri84->ARMri84.R.reg = reg;
    450    return ri84;
    451 }
    452 
    453 void ppARMRI84 ( ARMRI84* ri84 ) {
    454    switch (ri84->tag) {
    455       case ARMri84_I84:
    456          vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
    457                                   2 * ri84->ARMri84.I84.imm4));
    458          break;
    459       case ARMri84_R:
    460          ppHRegARM(ri84->ARMri84.R.reg);
    461          break;
    462       default:
    463          vassert(0);
    464    }
    465 }
    466 
    467 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
    468    switch (ri84->tag) {
    469       case ARMri84_I84:
    470          return;
    471       case ARMri84_R:
    472          addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
    473          return;
    474       default:
    475          vpanic("addRegUsage_ARMRI84");
    476    }
    477 }
    478 
    479 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
    480    switch (ri84->tag) {
    481       case ARMri84_I84:
    482          return;
    483       case ARMri84_R:
    484          ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
    485          return;
    486       default:
    487          vpanic("mapRegs_ARMRI84");
    488    }
    489 }
    490 
    491 
    492 /* --------- Reg or imm5 operands --------- */
    493 
    494 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
    495    ARMRI5* ri5         = LibVEX_Alloc(sizeof(ARMRI5));
    496    ri5->tag            = ARMri5_I5;
    497    ri5->ARMri5.I5.imm5 = imm5;
    498    vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
    499    return ri5;
    500 }
    501 ARMRI5* ARMRI5_R ( HReg reg ) {
    502    ARMRI5* ri5       = LibVEX_Alloc(sizeof(ARMRI5));
    503    ri5->tag          = ARMri5_R;
    504    ri5->ARMri5.R.reg = reg;
    505    return ri5;
    506 }
    507 
    508 void ppARMRI5 ( ARMRI5* ri5 ) {
    509    switch (ri5->tag) {
    510       case ARMri5_I5:
    511          vex_printf("%u", ri5->ARMri5.I5.imm5);
    512          break;
    513       case ARMri5_R:
    514          ppHRegARM(ri5->ARMri5.R.reg);
    515          break;
    516       default:
    517          vassert(0);
    518    }
    519 }
    520 
    521 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
    522    switch (ri5->tag) {
    523       case ARMri5_I5:
    524          return;
    525       case ARMri5_R:
    526          addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
    527          return;
    528       default:
    529          vpanic("addRegUsage_ARMRI5");
    530    }
    531 }
    532 
    533 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
    534    switch (ri5->tag) {
    535       case ARMri5_I5:
    536          return;
    537       case ARMri5_R:
    538          ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
    539          return;
    540       default:
    541          vpanic("mapRegs_ARMRI5");
    542    }
    543 }
    544 
    545 /* -------- Neon Immediate operatnd --------- */
    546 
    547 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
    548    ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
    549    i->type = type;
    550    i->imm8 = imm8;
    551    return i;
    552 }
    553 
    554 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
    555    int i, j;
    556    ULong y, x = imm->imm8;
    557    switch (imm->type) {
    558       case 3:
    559          x = x << 8;
    560       case 2:
    561          x = x << 8;
    562       case 1:
    563          x = x << 8;
    564       case 0:
    565          return (x << 32) | x;
    566       case 5:
    567       case 6:
    568          if (imm->type == 5)
    569             x = x << 8;
    570          else
    571             x = (x << 8) | x;
    572       case 4:
    573          x = (x << 16) | x;
    574          return (x << 32) | x;
    575       case 8:
    576          x = (x << 8) | 0xFF;
    577       case 7:
    578          x = (x << 8) | 0xFF;
    579          return (x << 32) | x;
    580       case 9:
    581          x = 0;
    582          for (i = 7; i >= 0; i--) {
    583             y = ((ULong)imm->imm8 >> i) & 1;
    584             for (j = 0; j < 8; j++) {
    585                x = (x << 1) | y;
    586             }
    587          }
    588          return x;
    589       case 10:
    590          x |= (x & 0x80) << 5;
    591          x |= ~(x & 0x40) << 5;
    592          x &= 0x187F; /* 0001 1000 0111 1111 */
    593          x |= (x & 0x40) << 4;
    594          x |= (x & 0x40) << 3;
    595          x |= (x & 0x40) << 2;
    596          x |= (x & 0x40) << 1;
    597          x = x << 19;
    598          x = (x << 32) | x;
    599          return x;
    600       default:
    601          vpanic("ARMNImm_to_Imm64");
    602    }
    603 }
    604 
    605 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
    606    ARMNImm tmp;
    607    if ((x & 0xFFFFFFFF) == (x >> 32)) {
    608       if ((x & 0xFFFFFF00) == 0)
    609          return ARMNImm_TI(0, x & 0xFF);
    610       if ((x & 0xFFFF00FF) == 0)
    611          return ARMNImm_TI(1, (x >> 8) & 0xFF);
    612       if ((x & 0xFF00FFFF) == 0)
    613          return ARMNImm_TI(2, (x >> 16) & 0xFF);
    614       if ((x & 0x00FFFFFF) == 0)
    615          return ARMNImm_TI(3, (x >> 24) & 0xFF);
    616       if ((x & 0xFFFF00FF) == 0xFF)
    617          return ARMNImm_TI(7, (x >> 8) & 0xFF);
    618       if ((x & 0xFF00FFFF) == 0xFFFF)
    619          return ARMNImm_TI(8, (x >> 16) & 0xFF);
    620       if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
    621          if ((x & 0xFF00) == 0)
    622             return ARMNImm_TI(4, x & 0xFF);
    623          if ((x & 0x00FF) == 0)
    624             return ARMNImm_TI(5, (x >> 8) & 0xFF);
    625          if ((x & 0xFF) == ((x >> 8) & 0xFF))
    626             return ARMNImm_TI(6, x & 0xFF);
    627       }
    628       if ((x & 0x7FFFF) == 0) {
    629          tmp.type = 10;
    630          tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
    631          if (ARMNImm_to_Imm64(&tmp) == x)
    632             return ARMNImm_TI(tmp.type, tmp.imm8);
    633       }
    634    } else {
    635       /* This can only be type 9. */
    636       tmp.imm8 = (((x >> 56) & 1) << 7)
    637                | (((x >> 48) & 1) << 6)
    638                | (((x >> 40) & 1) << 5)
    639                | (((x >> 32) & 1) << 4)
    640                | (((x >> 24) & 1) << 3)
    641                | (((x >> 16) & 1) << 2)
    642                | (((x >>  8) & 1) << 1)
    643                | (((x >>  0) & 1) << 0);
    644       tmp.type = 9;
    645       if (ARMNImm_to_Imm64 (&tmp) == x)
    646          return ARMNImm_TI(tmp.type, tmp.imm8);
    647    }
    648    return NULL;
    649 }
    650 
    651 void ppARMNImm (ARMNImm* i) {
    652    ULong x = ARMNImm_to_Imm64(i);
    653    vex_printf("0x%llX%llX", x, x);
    654 }
    655 
    656 /* -- Register or scalar operand --- */
    657 
    658 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
    659 {
    660    ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
    661    p->tag = tag;
    662    p->reg = reg;
    663    p->index = index;
    664    return p;
    665 }
    666 
    667 void ppARMNRS(ARMNRS *p)
    668 {
    669    ppHRegARM(p->reg);
    670    if (p->tag == ARMNRS_Scalar) {
    671       vex_printf("[%d]", p->index);
    672    }
    673 }
    674 
    675 /* --------- Instructions. --------- */
    676 
    677 HChar* showARMAluOp ( ARMAluOp op ) {
    678    switch (op) {
    679       case ARMalu_ADD:  return "add";
    680       case ARMalu_ADDS: return "adds";
    681       case ARMalu_ADC:  return "adc";
    682       case ARMalu_SUB:  return "sub";
    683       case ARMalu_SUBS: return "subs";
    684       case ARMalu_SBC:  return "sbc";
    685       case ARMalu_AND:  return "and";
    686       case ARMalu_BIC:  return "bic";
    687       case ARMalu_OR:   return "orr";
    688       case ARMalu_XOR:  return "xor";
    689       default: vpanic("showARMAluOp");
    690    }
    691 }
    692 
    693 HChar* showARMShiftOp ( ARMShiftOp op ) {
    694    switch (op) {
    695       case ARMsh_SHL: return "shl";
    696       case ARMsh_SHR: return "shr";
    697       case ARMsh_SAR: return "sar";
    698       default: vpanic("showARMShiftOp");
    699    }
    700 }
    701 
    702 HChar* showARMUnaryOp ( ARMUnaryOp op ) {
    703    switch (op) {
    704       case ARMun_NEG: return "neg";
    705       case ARMun_NOT: return "not";
    706       case ARMun_CLZ: return "clz";
    707       default: vpanic("showARMUnaryOp");
    708    }
    709 }
    710 
    711 HChar* showARMMulOp ( ARMMulOp op ) {
    712    switch (op) {
    713       case ARMmul_PLAIN: return "mul";
    714       case ARMmul_ZX:    return "umull";
    715       case ARMmul_SX:    return "smull";
    716       default: vpanic("showARMMulOp");
    717    }
    718 }
    719 
    720 HChar* showARMVfpOp ( ARMVfpOp op ) {
    721    switch (op) {
    722       case ARMvfp_ADD: return "add";
    723       case ARMvfp_SUB: return "sub";
    724       case ARMvfp_MUL: return "mul";
    725       case ARMvfp_DIV: return "div";
    726       default: vpanic("showARMVfpOp");
    727    }
    728 }
    729 
    730 HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
    731    switch (op) {
    732       case ARMvfpu_COPY: return "cpy";
    733       case ARMvfpu_NEG:  return "neg";
    734       case ARMvfpu_ABS:  return "abs";
    735       case ARMvfpu_SQRT: return "sqrt";
    736       default: vpanic("showARMVfpUnaryOp");
    737    }
    738 }
    739 
    740 HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
    741    switch (op) {
    742       case ARMneon_VAND: return "vand";
    743       case ARMneon_VORR: return "vorr";
    744       case ARMneon_VXOR: return "veor";
    745       case ARMneon_VADD: return "vadd";
    746       case ARMneon_VRHADDS: return "vrhadd";
    747       case ARMneon_VRHADDU: return "vrhadd";
    748       case ARMneon_VADDFP: return "vadd";
    749       case ARMneon_VPADDFP: return "vpadd";
    750       case ARMneon_VABDFP: return "vabd";
    751       case ARMneon_VSUB: return "vsub";
    752       case ARMneon_VSUBFP: return "vsub";
    753       case ARMneon_VMINU: return "vmin";
    754       case ARMneon_VMINS: return "vmin";
    755       case ARMneon_VMINF: return "vmin";
    756       case ARMneon_VMAXU: return "vmax";
    757       case ARMneon_VMAXS: return "vmax";
    758       case ARMneon_VMAXF: return "vmax";
    759       case ARMneon_VQADDU: return "vqadd";
    760       case ARMneon_VQADDS: return "vqadd";
    761       case ARMneon_VQSUBU: return "vqsub";
    762       case ARMneon_VQSUBS: return "vqsub";
    763       case ARMneon_VCGTU:  return "vcgt";
    764       case ARMneon_VCGTS:  return "vcgt";
    765       case ARMneon_VCGTF:  return "vcgt";
    766       case ARMneon_VCGEF:  return "vcgt";
    767       case ARMneon_VCGEU:  return "vcge";
    768       case ARMneon_VCGES:  return "vcge";
    769       case ARMneon_VCEQ:  return "vceq";
    770       case ARMneon_VCEQF:  return "vceq";
    771       case ARMneon_VPADD:   return "vpadd";
    772       case ARMneon_VPMINU:   return "vpmin";
    773       case ARMneon_VPMINS:   return "vpmin";
    774       case ARMneon_VPMINF:   return "vpmin";
    775       case ARMneon_VPMAXU:   return "vpmax";
    776       case ARMneon_VPMAXS:   return "vpmax";
    777       case ARMneon_VPMAXF:   return "vpmax";
    778       case ARMneon_VEXT:   return "vext";
    779       case ARMneon_VMUL:   return "vmuli";
    780       case ARMneon_VMULLU:   return "vmull";
    781       case ARMneon_VMULLS:   return "vmull";
    782       case ARMneon_VMULP:  return "vmul";
    783       case ARMneon_VMULFP:  return "vmul";
    784       case ARMneon_VMULLP:  return "vmul";
    785       case ARMneon_VQDMULH: return "vqdmulh";
    786       case ARMneon_VQRDMULH: return "vqrdmulh";
    787       case ARMneon_VQDMULL: return "vqdmull";
    788       case ARMneon_VTBL: return "vtbl";
    789       case ARMneon_VRECPS: return "vrecps";
    790       case ARMneon_VRSQRTS: return "vrecps";
    791       /* ... */
    792       default: vpanic("showARMNeonBinOp");
    793    }
    794 }
    795 
    796 HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
    797    switch (op) {
    798       case ARMneon_VAND:
    799       case ARMneon_VORR:
    800       case ARMneon_VXOR:
    801          return "";
    802       case ARMneon_VADD:
    803       case ARMneon_VSUB:
    804       case ARMneon_VEXT:
    805       case ARMneon_VMUL:
    806       case ARMneon_VPADD:
    807       case ARMneon_VTBL:
    808       case ARMneon_VCEQ:
    809          return ".i";
    810       case ARMneon_VRHADDU:
    811       case ARMneon_VMINU:
    812       case ARMneon_VMAXU:
    813       case ARMneon_VQADDU:
    814       case ARMneon_VQSUBU:
    815       case ARMneon_VCGTU:
    816       case ARMneon_VCGEU:
    817       case ARMneon_VMULLU:
    818       case ARMneon_VPMINU:
    819       case ARMneon_VPMAXU:
    820          return ".u";
    821       case ARMneon_VRHADDS:
    822       case ARMneon_VMINS:
    823       case ARMneon_VMAXS:
    824       case ARMneon_VQADDS:
    825       case ARMneon_VQSUBS:
    826       case ARMneon_VCGTS:
    827       case ARMneon_VCGES:
    828       case ARMneon_VQDMULL:
    829       case ARMneon_VMULLS:
    830       case ARMneon_VPMINS:
    831       case ARMneon_VPMAXS:
    832       case ARMneon_VQDMULH:
    833       case ARMneon_VQRDMULH:
    834          return ".s";
    835       case ARMneon_VMULP:
    836       case ARMneon_VMULLP:
    837          return ".p";
    838       case ARMneon_VADDFP:
    839       case ARMneon_VABDFP:
    840       case ARMneon_VPADDFP:
    841       case ARMneon_VSUBFP:
    842       case ARMneon_VMULFP:
    843       case ARMneon_VMINF:
    844       case ARMneon_VMAXF:
    845       case ARMneon_VPMINF:
    846       case ARMneon_VPMAXF:
    847       case ARMneon_VCGTF:
    848       case ARMneon_VCGEF:
    849       case ARMneon_VCEQF:
    850       case ARMneon_VRECPS:
    851       case ARMneon_VRSQRTS:
    852          return ".f";
    853       /* ... */
    854       default: vpanic("showARMNeonBinOpDataType");
    855    }
    856 }
    857 
    858 HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
    859    switch (op) {
    860       case ARMneon_COPY: return "vmov";
    861       case ARMneon_COPYLS: return "vmov";
    862       case ARMneon_COPYLU: return "vmov";
    863       case ARMneon_COPYN: return "vmov";
    864       case ARMneon_COPYQNSS: return "vqmovn";
    865       case ARMneon_COPYQNUS: return "vqmovun";
    866       case ARMneon_COPYQNUU: return "vqmovn";
    867       case ARMneon_NOT: return "vmvn";
    868       case ARMneon_EQZ: return "vceq";
    869       case ARMneon_CNT: return "vcnt";
    870       case ARMneon_CLS: return "vcls";
    871       case ARMneon_CLZ: return "vclz";
    872       case ARMneon_DUP: return "vdup";
    873       case ARMneon_PADDLS: return "vpaddl";
    874       case ARMneon_PADDLU: return "vpaddl";
    875       case ARMneon_VQSHLNSS: return "vqshl";
    876       case ARMneon_VQSHLNUU: return "vqshl";
    877       case ARMneon_VQSHLNUS: return "vqshlu";
    878       case ARMneon_REV16: return "vrev16";
    879       case ARMneon_REV32: return "vrev32";
    880       case ARMneon_REV64: return "vrev64";
    881       case ARMneon_VCVTFtoU: return "vcvt";
    882       case ARMneon_VCVTFtoS: return "vcvt";
    883       case ARMneon_VCVTUtoF: return "vcvt";
    884       case ARMneon_VCVTStoF: return "vcvt";
    885       case ARMneon_VCVTFtoFixedU: return "vcvt";
    886       case ARMneon_VCVTFtoFixedS: return "vcvt";
    887       case ARMneon_VCVTFixedUtoF: return "vcvt";
    888       case ARMneon_VCVTFixedStoF: return "vcvt";
    889       case ARMneon_VCVTF32toF16: return "vcvt";
    890       case ARMneon_VCVTF16toF32: return "vcvt";
    891       case ARMneon_VRECIP: return "vrecip";
    892       case ARMneon_VRECIPF: return "vrecipf";
    893       case ARMneon_VNEGF: return "vneg";
    894       case ARMneon_ABS: return "vabs";
    895       case ARMneon_VABSFP: return "vabsfp";
    896       case ARMneon_VRSQRTEFP: return "vrsqrtefp";
    897       case ARMneon_VRSQRTE: return "vrsqrte";
    898       /* ... */
    899       default: vpanic("showARMNeonUnOp");
    900    }
    901 }
    902 
    903 HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
    904    switch (op) {
    905       case ARMneon_COPY:
    906       case ARMneon_NOT:
    907          return "";
    908       case ARMneon_COPYN:
    909       case ARMneon_EQZ:
    910       case ARMneon_CNT:
    911       case ARMneon_DUP:
    912       case ARMneon_REV16:
    913       case ARMneon_REV32:
    914       case ARMneon_REV64:
    915          return ".i";
    916       case ARMneon_COPYLU:
    917       case ARMneon_PADDLU:
    918       case ARMneon_COPYQNUU:
    919       case ARMneon_VQSHLNUU:
    920       case ARMneon_VRECIP:
    921       case ARMneon_VRSQRTE:
    922          return ".u";
    923       case ARMneon_CLS:
    924       case ARMneon_CLZ:
    925       case ARMneon_COPYLS:
    926       case ARMneon_PADDLS:
    927       case ARMneon_COPYQNSS:
    928       case ARMneon_COPYQNUS:
    929       case ARMneon_VQSHLNSS:
    930       case ARMneon_VQSHLNUS:
    931       case ARMneon_ABS:
    932          return ".s";
    933       case ARMneon_VRECIPF:
    934       case ARMneon_VNEGF:
    935       case ARMneon_VABSFP:
    936       case ARMneon_VRSQRTEFP:
    937          return ".f";
    938       case ARMneon_VCVTFtoU: return ".u32.f32";
    939       case ARMneon_VCVTFtoS: return ".s32.f32";
    940       case ARMneon_VCVTUtoF: return ".f32.u32";
    941       case ARMneon_VCVTStoF: return ".f32.s32";
    942       case ARMneon_VCVTF16toF32: return ".f32.f16";
    943       case ARMneon_VCVTF32toF16: return ".f16.f32";
    944       case ARMneon_VCVTFtoFixedU: return ".u32.f32";
    945       case ARMneon_VCVTFtoFixedS: return ".s32.f32";
    946       case ARMneon_VCVTFixedUtoF: return ".f32.u32";
    947       case ARMneon_VCVTFixedStoF: return ".f32.s32";
    948       /* ... */
    949       default: vpanic("showARMNeonUnOpDataType");
    950    }
    951 }
    952 
    953 HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
    954    switch (op) {
    955       case ARMneon_SETELEM: return "vmov";
    956       case ARMneon_GETELEMU: return "vmov";
    957       case ARMneon_GETELEMS: return "vmov";
    958       case ARMneon_VDUP: return "vdup";
    959       /* ... */
    960       default: vpanic("showARMNeonUnarySOp");
    961    }
    962 }
    963 
    964 HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
    965    switch (op) {
    966       case ARMneon_SETELEM:
    967       case ARMneon_VDUP:
    968          return ".i";
    969       case ARMneon_GETELEMS:
    970          return ".s";
    971       case ARMneon_GETELEMU:
    972          return ".u";
    973       /* ... */
    974       default: vpanic("showARMNeonUnarySOp");
    975    }
    976 }
    977 
    978 HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
    979    switch (op) {
    980       case ARMneon_VSHL: return "vshl";
    981       case ARMneon_VSAL: return "vshl";
    982       case ARMneon_VQSHL: return "vqshl";
    983       case ARMneon_VQSAL: return "vqshl";
    984       /* ... */
    985       default: vpanic("showARMNeonShiftOp");
    986    }
    987 }
    988 
    989 HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
    990    switch (op) {
    991       case ARMneon_VSHL:
    992       case ARMneon_VQSHL:
    993          return ".u";
    994       case ARMneon_VSAL:
    995       case ARMneon_VQSAL:
    996          return ".s";
    997       /* ... */
    998       default: vpanic("showARMNeonShiftOpDataType");
    999    }
   1000 }
   1001 
   1002 HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
   1003    switch (op) {
   1004       case ARMneon_TRN: return "vtrn";
   1005       case ARMneon_ZIP: return "vzip";
   1006       case ARMneon_UZP: return "vuzp";
   1007       /* ... */
   1008       default: vpanic("showARMNeonDualOp");
   1009    }
   1010 }
   1011 
   1012 HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
   1013    switch (op) {
   1014       case ARMneon_TRN:
   1015       case ARMneon_ZIP:
   1016       case ARMneon_UZP:
   1017          return "i";
   1018       /* ... */
   1019       default: vpanic("showARMNeonDualOp");
   1020    }
   1021 }
   1022 
   1023 static HChar* showARMNeonDataSize_wrk ( UInt size )
   1024 {
   1025    switch (size) {
   1026       case 0: return "8";
   1027       case 1: return "16";
   1028       case 2: return "32";
   1029       case 3: return "64";
   1030       default: vpanic("showARMNeonDataSize");
   1031    }
   1032 }
   1033 
   1034 static HChar* showARMNeonDataSize ( ARMInstr* i )
   1035 {
   1036    switch (i->tag) {
   1037       case ARMin_NBinary:
   1038          if (i->ARMin.NBinary.op == ARMneon_VEXT)
   1039             return "8";
   1040          if (i->ARMin.NBinary.op == ARMneon_VAND ||
   1041              i->ARMin.NBinary.op == ARMneon_VORR ||
   1042              i->ARMin.NBinary.op == ARMneon_VXOR)
   1043             return "";
   1044          return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
   1045       case ARMin_NUnary:
   1046          if (i->ARMin.NUnary.op == ARMneon_COPY ||
   1047              i->ARMin.NUnary.op == ARMneon_NOT ||
   1048              i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
   1049              i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
   1050              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1051              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1052              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1053              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
   1054              i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
   1055              i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
   1056              i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
   1057              i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
   1058             return "";
   1059          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1060              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1061              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1062             UInt size;
   1063             size = i->ARMin.NUnary.size;
   1064             if (size & 0x40)
   1065                return "64";
   1066             if (size & 0x20)
   1067                return "32";
   1068             if (size & 0x10)
   1069                return "16";
   1070             if (size & 0x08)
   1071                return "8";
   1072             vpanic("showARMNeonDataSize");
   1073          }
   1074          return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
   1075       case ARMin_NUnaryS:
   1076          if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
   1077             int size;
   1078             size = i->ARMin.NUnaryS.size;
   1079             if ((size & 1) == 1)
   1080                return "8";
   1081             if ((size & 3) == 2)
   1082                return "16";
   1083             if ((size & 7) == 4)
   1084                return "32";
   1085             vpanic("showARMNeonDataSize");
   1086          }
   1087          return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
   1088       case ARMin_NShift:
   1089          return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
   1090       case ARMin_NDual:
   1091          return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
   1092       default:
   1093          vpanic("showARMNeonDataSize");
   1094    }
   1095 }
   1096 
   1097 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
   1098                          HReg dst, HReg argL, ARMRI84* argR ) {
   1099    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1100    i->tag            = ARMin_Alu;
   1101    i->ARMin.Alu.op   = op;
   1102    i->ARMin.Alu.dst  = dst;
   1103    i->ARMin.Alu.argL = argL;
   1104    i->ARMin.Alu.argR = argR;
   1105    return i;
   1106 }
   1107 ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
   1108                             HReg dst, HReg argL, ARMRI5* argR ) {
   1109    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1110    i->tag              = ARMin_Shift;
   1111    i->ARMin.Shift.op   = op;
   1112    i->ARMin.Shift.dst  = dst;
   1113    i->ARMin.Shift.argL = argL;
   1114    i->ARMin.Shift.argR = argR;
   1115    return i;
   1116 }
   1117 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
   1118    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1119    i->tag             = ARMin_Unary;
   1120    i->ARMin.Unary.op  = op;
   1121    i->ARMin.Unary.dst = dst;
   1122    i->ARMin.Unary.src = src;
   1123    return i;
   1124 }
   1125 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
   1126    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1127    i->tag                  = ARMin_CmpOrTst;
   1128    i->ARMin.CmpOrTst.isCmp = isCmp;
   1129    i->ARMin.CmpOrTst.argL  = argL;
   1130    i->ARMin.CmpOrTst.argR  = argR;
   1131    return i;
   1132 }
   1133 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
   1134    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1135    i->tag           = ARMin_Mov;
   1136    i->ARMin.Mov.dst = dst;
   1137    i->ARMin.Mov.src = src;
   1138    return i;
   1139 }
   1140 ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
   1141    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1142    i->tag               = ARMin_Imm32;
   1143    i->ARMin.Imm32.dst   = dst;
   1144    i->ARMin.Imm32.imm32 = imm32;
   1145    return i;
   1146 }
   1147 ARMInstr* ARMInstr_LdSt32 ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1148    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1149    i->tag                 = ARMin_LdSt32;
   1150    i->ARMin.LdSt32.isLoad = isLoad;
   1151    i->ARMin.LdSt32.rD     = rD;
   1152    i->ARMin.LdSt32.amode  = amode;
   1153    return i;
   1154 }
   1155 ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad,
   1156                             HReg rD, ARMAMode2* amode ) {
   1157    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1158    i->tag                     = ARMin_LdSt16;
   1159    i->ARMin.LdSt16.isLoad     = isLoad;
   1160    i->ARMin.LdSt16.signedLoad = signedLoad;
   1161    i->ARMin.LdSt16.rD         = rD;
   1162    i->ARMin.LdSt16.amode      = amode;
   1163    return i;
   1164 }
   1165 ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1166    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1167    i->tag                 = ARMin_LdSt8U;
   1168    i->ARMin.LdSt8U.isLoad = isLoad;
   1169    i->ARMin.LdSt8U.rD     = rD;
   1170    i->ARMin.LdSt8U.amode  = amode;
   1171    return i;
   1172 }
   1173 //extern ARMInstr* ARMInstr_Ld8S   ( HReg, ARMAMode2* );
   1174 ARMInstr* ARMInstr_Goto ( IRJumpKind jk, ARMCondCode cond, HReg gnext ) {
   1175    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1176    i->tag              = ARMin_Goto;
   1177    i->ARMin.Goto.jk    = jk;
   1178    i->ARMin.Goto.cond  = cond;
   1179    i->ARMin.Goto.gnext = gnext;
   1180    return i;
   1181 }
   1182 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
   1183    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1184    i->tag             = ARMin_CMov;
   1185    i->ARMin.CMov.cond = cond;
   1186    i->ARMin.CMov.dst  = dst;
   1187    i->ARMin.CMov.src  = src;
   1188    vassert(cond != ARMcc_AL);
   1189    return i;
   1190 }
   1191 ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs ) {
   1192    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1193    i->tag                 = ARMin_Call;
   1194    i->ARMin.Call.cond     = cond;
   1195    i->ARMin.Call.target   = target;
   1196    i->ARMin.Call.nArgRegs = nArgRegs;
   1197    return i;
   1198 }
   1199 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
   1200    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1201    i->tag          = ARMin_Mul;
   1202    i->ARMin.Mul.op = op;
   1203    return i;
   1204 }
   1205 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
   1206    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1207    i->tag             = ARMin_LdrEX;
   1208    i->ARMin.LdrEX.szB = szB;
   1209    vassert(szB == 8 || szB == 4 || szB == 1);
   1210    return i;
   1211 }
   1212 ARMInstr* ARMInstr_StrEX ( Int szB ) {
   1213    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1214    i->tag             = ARMin_StrEX;
   1215    i->ARMin.StrEX.szB = szB;
   1216    vassert(szB == 8 || szB == 4 || szB == 1);
   1217    return i;
   1218 }
   1219 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
   1220    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1221    i->tag                 = ARMin_VLdStD;
   1222    i->ARMin.VLdStD.isLoad = isLoad;
   1223    i->ARMin.VLdStD.dD     = dD;
   1224    i->ARMin.VLdStD.amode  = am;
   1225    return i;
   1226 }
   1227 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
   1228    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1229    i->tag                 = ARMin_VLdStS;
   1230    i->ARMin.VLdStS.isLoad = isLoad;
   1231    i->ARMin.VLdStS.fD     = fD;
   1232    i->ARMin.VLdStS.amode  = am;
   1233    return i;
   1234 }
   1235 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1236    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1237    i->tag              = ARMin_VAluD;
   1238    i->ARMin.VAluD.op   = op;
   1239    i->ARMin.VAluD.dst  = dst;
   1240    i->ARMin.VAluD.argL = argL;
   1241    i->ARMin.VAluD.argR = argR;
   1242    return i;
   1243 }
   1244 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1245    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1246    i->tag              = ARMin_VAluS;
   1247    i->ARMin.VAluS.op   = op;
   1248    i->ARMin.VAluS.dst  = dst;
   1249    i->ARMin.VAluS.argL = argL;
   1250    i->ARMin.VAluS.argR = argR;
   1251    return i;
   1252 }
   1253 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1254    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1255    i->tag               = ARMin_VUnaryD;
   1256    i->ARMin.VUnaryD.op  = op;
   1257    i->ARMin.VUnaryD.dst = dst;
   1258    i->ARMin.VUnaryD.src = src;
   1259    return i;
   1260 }
   1261 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1262    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1263    i->tag               = ARMin_VUnaryS;
   1264    i->ARMin.VUnaryS.op  = op;
   1265    i->ARMin.VUnaryS.dst = dst;
   1266    i->ARMin.VUnaryS.src = src;
   1267    return i;
   1268 }
   1269 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
   1270    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1271    i->tag              = ARMin_VCmpD;
   1272    i->ARMin.VCmpD.argL = argL;
   1273    i->ARMin.VCmpD.argR = argR;
   1274    return i;
   1275 }
   1276 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
   1277    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1278    i->tag               = ARMin_VCMovD;
   1279    i->ARMin.VCMovD.cond = cond;
   1280    i->ARMin.VCMovD.dst  = dst;
   1281    i->ARMin.VCMovD.src  = src;
   1282    vassert(cond != ARMcc_AL);
   1283    return i;
   1284 }
   1285 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
   1286    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1287    i->tag               = ARMin_VCMovS;
   1288    i->ARMin.VCMovS.cond = cond;
   1289    i->ARMin.VCMovS.dst  = dst;
   1290    i->ARMin.VCMovS.src  = src;
   1291    vassert(cond != ARMcc_AL);
   1292    return i;
   1293 }
   1294 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
   1295    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1296    i->tag               = ARMin_VCvtSD;
   1297    i->ARMin.VCvtSD.sToD = sToD;
   1298    i->ARMin.VCvtSD.dst  = dst;
   1299    i->ARMin.VCvtSD.src  = src;
   1300    return i;
   1301 }
   1302 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
   1303    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1304    i->tag              = ARMin_VXferD;
   1305    i->ARMin.VXferD.toD = toD;
   1306    i->ARMin.VXferD.dD  = dD;
   1307    i->ARMin.VXferD.rHi = rHi;
   1308    i->ARMin.VXferD.rLo = rLo;
   1309    return i;
   1310 }
   1311 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
   1312    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1313    i->tag              = ARMin_VXferS;
   1314    i->ARMin.VXferS.toS = toS;
   1315    i->ARMin.VXferS.fD  = fD;
   1316    i->ARMin.VXferS.rLo = rLo;
   1317    return i;
   1318 }
   1319 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
   1320                             HReg dst, HReg src ) {
   1321    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1322    i->tag                = ARMin_VCvtID;
   1323    i->ARMin.VCvtID.iToD  = iToD;
   1324    i->ARMin.VCvtID.syned = syned;
   1325    i->ARMin.VCvtID.dst   = dst;
   1326    i->ARMin.VCvtID.src   = src;
   1327    return i;
   1328 }
   1329 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
   1330    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1331    i->tag                 = ARMin_FPSCR;
   1332    i->ARMin.FPSCR.toFPSCR = toFPSCR;
   1333    i->ARMin.FPSCR.iReg    = iReg;
   1334    return i;
   1335 }
   1336 ARMInstr* ARMInstr_MFence ( void ) {
   1337    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1338    i->tag      = ARMin_MFence;
   1339    return i;
   1340 }
   1341 
   1342 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
   1343    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1344    i->tag                  = ARMin_NLdStQ;
   1345    i->ARMin.NLdStQ.isLoad  = isLoad;
   1346    i->ARMin.NLdStQ.dQ      = dQ;
   1347    i->ARMin.NLdStQ.amode   = amode;
   1348    return i;
   1349 }
   1350 
   1351 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
   1352    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1353    i->tag                  = ARMin_NLdStD;
   1354    i->ARMin.NLdStD.isLoad  = isLoad;
   1355    i->ARMin.NLdStD.dD      = dD;
   1356    i->ARMin.NLdStD.amode   = amode;
   1357    return i;
   1358 }
   1359 
   1360 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
   1361                             UInt size, Bool Q ) {
   1362    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1363    i->tag                = ARMin_NUnary;
   1364    i->ARMin.NUnary.op   = op;
   1365    i->ARMin.NUnary.src  = nQ;
   1366    i->ARMin.NUnary.dst  = dQ;
   1367    i->ARMin.NUnary.size = size;
   1368    i->ARMin.NUnary.Q    = Q;
   1369    return i;
   1370 }
   1371 
   1372 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOp op, ARMNRS* dst, ARMNRS* src,
   1373                              UInt size, Bool Q ) {
   1374    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1375    i->tag                = ARMin_NUnaryS;
   1376    i->ARMin.NUnaryS.op   = op;
   1377    i->ARMin.NUnaryS.src  = src;
   1378    i->ARMin.NUnaryS.dst  = dst;
   1379    i->ARMin.NUnaryS.size = size;
   1380    i->ARMin.NUnaryS.Q    = Q;
   1381    return i;
   1382 }
   1383 
   1384 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
   1385                            UInt size, Bool Q ) {
   1386    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1387    i->tag                = ARMin_NDual;
   1388    i->ARMin.NDual.op   = op;
   1389    i->ARMin.NDual.arg1 = nQ;
   1390    i->ARMin.NDual.arg2 = mQ;
   1391    i->ARMin.NDual.size = size;
   1392    i->ARMin.NDual.Q    = Q;
   1393    return i;
   1394 }
   1395 
   1396 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
   1397                              HReg dst, HReg argL, HReg argR,
   1398                              UInt size, Bool Q ) {
   1399    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1400    i->tag                = ARMin_NBinary;
   1401    i->ARMin.NBinary.op   = op;
   1402    i->ARMin.NBinary.argL = argL;
   1403    i->ARMin.NBinary.argR = argR;
   1404    i->ARMin.NBinary.dst  = dst;
   1405    i->ARMin.NBinary.size = size;
   1406    i->ARMin.NBinary.Q    = Q;
   1407    return i;
   1408 }
   1409 
   1410 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
   1411    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1412    i->tag         = ARMin_NeonImm;
   1413    i->ARMin.NeonImm.dst = dst;
   1414    i->ARMin.NeonImm.imm = imm;
   1415    return i;
   1416 }
   1417 
   1418 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
   1419    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1420    i->tag               = ARMin_NCMovQ;
   1421    i->ARMin.NCMovQ.cond = cond;
   1422    i->ARMin.NCMovQ.dst  = dst;
   1423    i->ARMin.NCMovQ.src  = src;
   1424    vassert(cond != ARMcc_AL);
   1425    return i;
   1426 }
   1427 
   1428 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
   1429                             HReg dst, HReg argL, HReg argR,
   1430                             UInt size, Bool Q ) {
   1431    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1432    i->tag                = ARMin_NShift;
   1433    i->ARMin.NShift.op   = op;
   1434    i->ARMin.NShift.argL = argL;
   1435    i->ARMin.NShift.argR = argR;
   1436    i->ARMin.NShift.dst  = dst;
   1437    i->ARMin.NShift.size = size;
   1438    i->ARMin.NShift.Q    = Q;
   1439    return i;
   1440 }
   1441 
   1442 /* Helper copy-pasted from isel.c */
   1443 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
   1444 {
   1445    UInt i;
   1446    for (i = 0; i < 16; i++) {
   1447       if (0 == (u & 0xFFFFFF00)) {
   1448          *u8 = u;
   1449          *u4 = i;
   1450          return True;
   1451       }
   1452       u = ROR32(u, 30);
   1453    }
   1454    vassert(i == 16);
   1455    return False;
   1456 }
   1457 
   1458 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
   1459    UInt u8, u4;
   1460    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1461    /* Try to generate single ADD if possible */
   1462    if (fitsIn8x4(&u8, &u4, imm32)) {
   1463       i->tag            = ARMin_Alu;
   1464       i->ARMin.Alu.op   = ARMalu_ADD;
   1465       i->ARMin.Alu.dst  = rD;
   1466       i->ARMin.Alu.argL = rN;
   1467       i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
   1468    } else {
   1469       i->tag               = ARMin_Add32;
   1470       i->ARMin.Add32.rD    = rD;
   1471       i->ARMin.Add32.rN    = rN;
   1472       i->ARMin.Add32.imm32 = imm32;
   1473    }
   1474    return i;
   1475 }
   1476 
   1477 /* ... */
   1478 
   1479 void ppARMInstr ( ARMInstr* i ) {
   1480    switch (i->tag) {
   1481       case ARMin_Alu:
   1482          vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
   1483          ppHRegARM(i->ARMin.Alu.dst);
   1484          vex_printf(", ");
   1485          ppHRegARM(i->ARMin.Alu.argL);
   1486          vex_printf(", ");
   1487          ppARMRI84(i->ARMin.Alu.argR);
   1488          return;
   1489       case ARMin_Shift:
   1490          vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
   1491          ppHRegARM(i->ARMin.Shift.dst);
   1492          vex_printf(", ");
   1493          ppHRegARM(i->ARMin.Shift.argL);
   1494          vex_printf(", ");
   1495          ppARMRI5(i->ARMin.Shift.argR);
   1496          return;
   1497       case ARMin_Unary:
   1498          vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
   1499          ppHRegARM(i->ARMin.Unary.dst);
   1500          vex_printf(", ");
   1501          ppHRegARM(i->ARMin.Unary.src);
   1502          return;
   1503       case ARMin_CmpOrTst:
   1504          vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
   1505          ppHRegARM(i->ARMin.CmpOrTst.argL);
   1506          vex_printf(", ");
   1507          ppARMRI84(i->ARMin.CmpOrTst.argR);
   1508          return;
   1509       case ARMin_Mov:
   1510          vex_printf("mov   ");
   1511          ppHRegARM(i->ARMin.Mov.dst);
   1512          vex_printf(", ");
   1513          ppARMRI84(i->ARMin.Mov.src);
   1514          return;
   1515       case ARMin_Imm32:
   1516          vex_printf("imm   ");
   1517          ppHRegARM(i->ARMin.Imm32.dst);
   1518          vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
   1519          return;
   1520       case ARMin_LdSt32:
   1521          if (i->ARMin.LdSt32.isLoad) {
   1522             vex_printf("ldr   ");
   1523             ppHRegARM(i->ARMin.LdSt32.rD);
   1524             vex_printf(", ");
   1525             ppARMAMode1(i->ARMin.LdSt32.amode);
   1526          } else {
   1527             vex_printf("str   ");
   1528             ppARMAMode1(i->ARMin.LdSt32.amode);
   1529             vex_printf(", ");
   1530             ppHRegARM(i->ARMin.LdSt32.rD);
   1531          }
   1532          return;
   1533       case ARMin_LdSt16:
   1534          if (i->ARMin.LdSt16.isLoad) {
   1535             vex_printf("%s", i->ARMin.LdSt16.signedLoad
   1536                                 ? "ldrsh " : "ldrh  " );
   1537             ppHRegARM(i->ARMin.LdSt16.rD);
   1538             vex_printf(", ");
   1539             ppARMAMode2(i->ARMin.LdSt16.amode);
   1540          } else {
   1541             vex_printf("strh  ");
   1542             ppARMAMode2(i->ARMin.LdSt16.amode);
   1543             vex_printf(", ");
   1544             ppHRegARM(i->ARMin.LdSt16.rD);
   1545          }
   1546          return;
   1547       case ARMin_LdSt8U:
   1548          if (i->ARMin.LdSt8U.isLoad) {
   1549             vex_printf("ldrb  ");
   1550             ppHRegARM(i->ARMin.LdSt8U.rD);
   1551             vex_printf(", ");
   1552             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1553          } else {
   1554             vex_printf("strb  ");
   1555             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1556             vex_printf(", ");
   1557             ppHRegARM(i->ARMin.LdSt8U.rD);
   1558          }
   1559          return;
   1560       case ARMin_Ld8S:
   1561          goto unhandled;
   1562       case ARMin_Goto:
   1563          if (i->ARMin.Goto.cond != ARMcc_AL) {
   1564             vex_printf("if (%%cpsr.%s) { ",
   1565                        showARMCondCode(i->ARMin.Goto.cond));
   1566          } else {
   1567             vex_printf("if (1) { ");
   1568          }
   1569          if (i->ARMin.Goto.jk != Ijk_Boring
   1570              && i->ARMin.Goto.jk != Ijk_Call
   1571              && i->ARMin.Goto.jk != Ijk_Ret) {
   1572             vex_printf("mov r8, $");
   1573             ppIRJumpKind(i->ARMin.Goto.jk);
   1574             vex_printf(" ; ");
   1575          }
   1576          vex_printf("mov r0, ");
   1577          ppHRegARM(i->ARMin.Goto.gnext);
   1578          vex_printf(" ; bx r14");
   1579          if (i->ARMin.Goto.cond != ARMcc_AL) {
   1580             vex_printf(" }");
   1581          } else {
   1582             vex_printf(" }");
   1583          }
   1584          return;
   1585       case ARMin_CMov:
   1586          vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
   1587          ppHRegARM(i->ARMin.CMov.dst);
   1588          vex_printf(", ");
   1589          ppARMRI84(i->ARMin.CMov.src);
   1590          return;
   1591       case ARMin_Call:
   1592          vex_printf("call%s  ",
   1593                     i->ARMin.Call.cond==ARMcc_AL
   1594                        ? "" : showARMCondCode(i->ARMin.Call.cond));
   1595          vex_printf("0x%lx [nArgRegs=%d]",
   1596                     i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
   1597          return;
   1598       case ARMin_Mul:
   1599          vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
   1600          if (i->ARMin.Mul.op == ARMmul_PLAIN) {
   1601             vex_printf("r0, r2, r3");
   1602          } else {
   1603             vex_printf("r1:r0, r2, r3");
   1604          }
   1605          return;
   1606       case ARMin_LdrEX:
   1607          if (i->ARMin.LdrEX.szB == 8) {
   1608             vex_printf("ldrexd r2, r3, [r0]");
   1609          } else {
   1610             vex_printf("ldrex%s ", i->ARMin.LdrEX.szB == 1 ? "b"
   1611                                  : i->ARMin.LdrEX.szB == 2 ? "h" : "");
   1612             vex_printf("r2, [r0]");
   1613          }
   1614          return;
   1615       case ARMin_StrEX:
   1616          if (i->ARMin.StrEX.szB == 8) {
   1617             vex_printf("strexd r1, r2, r3, [r0]");
   1618          } else {
   1619             vex_printf("strex%s ", i->ARMin.StrEX.szB == 1 ? "b"
   1620                                  : i->ARMin.StrEX.szB == 2 ? "h" : "");
   1621             vex_printf("r1, r2, [r0]");
   1622          }
   1623          return;
   1624       case ARMin_VLdStD:
   1625          if (i->ARMin.VLdStD.isLoad) {
   1626             vex_printf("fldd  ");
   1627             ppHRegARM(i->ARMin.VLdStD.dD);
   1628             vex_printf(", ");
   1629             ppARMAModeV(i->ARMin.VLdStD.amode);
   1630          } else {
   1631             vex_printf("fstd  ");
   1632             ppARMAModeV(i->ARMin.VLdStD.amode);
   1633             vex_printf(", ");
   1634             ppHRegARM(i->ARMin.VLdStD.dD);
   1635          }
   1636          return;
   1637       case ARMin_VLdStS:
   1638          if (i->ARMin.VLdStS.isLoad) {
   1639             vex_printf("flds  ");
   1640             ppHRegARM(i->ARMin.VLdStS.fD);
   1641             vex_printf(", ");
   1642             ppARMAModeV(i->ARMin.VLdStS.amode);
   1643          } else {
   1644             vex_printf("fsts  ");
   1645             ppARMAModeV(i->ARMin.VLdStS.amode);
   1646             vex_printf(", ");
   1647             ppHRegARM(i->ARMin.VLdStS.fD);
   1648          }
   1649          return;
   1650       case ARMin_VAluD:
   1651          vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
   1652          ppHRegARM(i->ARMin.VAluD.dst);
   1653          vex_printf(", ");
   1654          ppHRegARM(i->ARMin.VAluD.argL);
   1655          vex_printf(", ");
   1656          ppHRegARM(i->ARMin.VAluD.argR);
   1657          return;
   1658       case ARMin_VAluS:
   1659          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
   1660          ppHRegARM(i->ARMin.VAluS.dst);
   1661          vex_printf(", ");
   1662          ppHRegARM(i->ARMin.VAluS.argL);
   1663          vex_printf(", ");
   1664          ppHRegARM(i->ARMin.VAluS.argR);
   1665          return;
   1666       case ARMin_VUnaryD:
   1667          vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
   1668          ppHRegARM(i->ARMin.VUnaryD.dst);
   1669          vex_printf(", ");
   1670          ppHRegARM(i->ARMin.VUnaryD.src);
   1671          return;
   1672       case ARMin_VUnaryS:
   1673          vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
   1674          ppHRegARM(i->ARMin.VUnaryS.dst);
   1675          vex_printf(", ");
   1676          ppHRegARM(i->ARMin.VUnaryS.src);
   1677          return;
   1678       case ARMin_VCmpD:
   1679          vex_printf("fcmpd ");
   1680          ppHRegARM(i->ARMin.VCmpD.argL);
   1681          vex_printf(", ");
   1682          ppHRegARM(i->ARMin.VCmpD.argR);
   1683          vex_printf(" ; fmstat");
   1684          return;
   1685       case ARMin_VCMovD:
   1686          vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
   1687          ppHRegARM(i->ARMin.VCMovD.dst);
   1688          vex_printf(", ");
   1689          ppHRegARM(i->ARMin.VCMovD.src);
   1690          return;
   1691       case ARMin_VCMovS:
   1692          vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
   1693          ppHRegARM(i->ARMin.VCMovS.dst);
   1694          vex_printf(", ");
   1695          ppHRegARM(i->ARMin.VCMovS.src);
   1696          return;
   1697       case ARMin_VCvtSD:
   1698          vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
   1699          ppHRegARM(i->ARMin.VCvtSD.dst);
   1700          vex_printf(", ");
   1701          ppHRegARM(i->ARMin.VCvtSD.src);
   1702          return;
   1703       case ARMin_VXferD:
   1704          vex_printf("vmov  ");
   1705          if (i->ARMin.VXferD.toD) {
   1706             ppHRegARM(i->ARMin.VXferD.dD);
   1707             vex_printf(", ");
   1708             ppHRegARM(i->ARMin.VXferD.rLo);
   1709             vex_printf(", ");
   1710             ppHRegARM(i->ARMin.VXferD.rHi);
   1711          } else {
   1712             ppHRegARM(i->ARMin.VXferD.rLo);
   1713             vex_printf(", ");
   1714             ppHRegARM(i->ARMin.VXferD.rHi);
   1715             vex_printf(", ");
   1716             ppHRegARM(i->ARMin.VXferD.dD);
   1717          }
   1718          return;
   1719       case ARMin_VXferS:
   1720          vex_printf("vmov  ");
   1721          if (i->ARMin.VXferS.toS) {
   1722             ppHRegARM(i->ARMin.VXferS.fD);
   1723             vex_printf(", ");
   1724             ppHRegARM(i->ARMin.VXferS.rLo);
   1725          } else {
   1726             ppHRegARM(i->ARMin.VXferS.rLo);
   1727             vex_printf(", ");
   1728             ppHRegARM(i->ARMin.VXferS.fD);
   1729          }
   1730          return;
   1731       case ARMin_VCvtID: {
   1732          HChar* nm = "?";
   1733          if (i->ARMin.VCvtID.iToD) {
   1734             nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
   1735          } else {
   1736             nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
   1737          }
   1738          vex_printf("%s ", nm);
   1739          ppHRegARM(i->ARMin.VCvtID.dst);
   1740          vex_printf(", ");
   1741          ppHRegARM(i->ARMin.VCvtID.src);
   1742          return;
   1743       }
   1744       case ARMin_FPSCR:
   1745          if (i->ARMin.FPSCR.toFPSCR) {
   1746             vex_printf("fmxr  fpscr, ");
   1747             ppHRegARM(i->ARMin.FPSCR.iReg);
   1748          } else {
   1749             vex_printf("fmrx  ");
   1750             ppHRegARM(i->ARMin.FPSCR.iReg);
   1751             vex_printf(", fpscr");
   1752          }
   1753          return;
   1754       case ARMin_MFence:
   1755          vex_printf("mfence (mcr 15,0,r0,c7,c10,4; 15,0,r0,c7,c10,5; "
   1756                     "15,0,r0,c7,c5,4)");
   1757          return;
   1758       case ARMin_NLdStQ:
   1759          if (i->ARMin.NLdStQ.isLoad)
   1760             vex_printf("vld1.32 {");
   1761          else
   1762             vex_printf("vst1.32 {");
   1763          ppHRegARM(i->ARMin.NLdStQ.dQ);
   1764          vex_printf("} ");
   1765          ppARMAModeN(i->ARMin.NLdStQ.amode);
   1766          return;
   1767       case ARMin_NLdStD:
   1768          if (i->ARMin.NLdStD.isLoad)
   1769             vex_printf("vld1.32 {");
   1770          else
   1771             vex_printf("vst1.32 {");
   1772          ppHRegARM(i->ARMin.NLdStD.dD);
   1773          vex_printf("} ");
   1774          ppARMAModeN(i->ARMin.NLdStD.amode);
   1775          return;
   1776       case ARMin_NUnary:
   1777          vex_printf("%s%s%s  ",
   1778                     showARMNeonUnOp(i->ARMin.NUnary.op),
   1779                     showARMNeonUnOpDataType(i->ARMin.NUnary.op),
   1780                     showARMNeonDataSize(i));
   1781          ppHRegARM(i->ARMin.NUnary.dst);
   1782          vex_printf(", ");
   1783          ppHRegARM(i->ARMin.NUnary.src);
   1784          if (i->ARMin.NUnary.op == ARMneon_EQZ)
   1785             vex_printf(", #0");
   1786          if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1787              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1788              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1789              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
   1790             vex_printf(", #%d", i->ARMin.NUnary.size);
   1791          }
   1792          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1793              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1794              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1795             UInt size;
   1796             size = i->ARMin.NUnary.size;
   1797             if (size & 0x40) {
   1798                vex_printf(", #%d", size - 64);
   1799             } else if (size & 0x20) {
   1800                vex_printf(", #%d", size - 32);
   1801             } else if (size & 0x10) {
   1802                vex_printf(", #%d", size - 16);
   1803             } else if (size & 0x08) {
   1804                vex_printf(", #%d", size - 8);
   1805             }
   1806          }
   1807          return;
   1808       case ARMin_NUnaryS:
   1809          vex_printf("%s%s%s  ",
   1810                     showARMNeonUnOpS(i->ARMin.NUnary.op),
   1811                     showARMNeonUnOpSDataType(i->ARMin.NUnary.op),
   1812                     showARMNeonDataSize(i));
   1813          ppARMNRS(i->ARMin.NUnaryS.dst);
   1814          vex_printf(", ");
   1815          ppARMNRS(i->ARMin.NUnaryS.src);
   1816          return;
   1817       case ARMin_NShift:
   1818          vex_printf("%s%s%s  ",
   1819                     showARMNeonShiftOp(i->ARMin.NShift.op),
   1820                     showARMNeonShiftOpDataType(i->ARMin.NShift.op),
   1821                     showARMNeonDataSize(i));
   1822          ppHRegARM(i->ARMin.NShift.dst);
   1823          vex_printf(", ");
   1824          ppHRegARM(i->ARMin.NShift.argL);
   1825          vex_printf(", ");
   1826          ppHRegARM(i->ARMin.NShift.argR);
   1827          return;
   1828       case ARMin_NDual:
   1829          vex_printf("%s%s%s  ",
   1830                     showARMNeonDualOp(i->ARMin.NDual.op),
   1831                     showARMNeonDualOpDataType(i->ARMin.NDual.op),
   1832                     showARMNeonDataSize(i));
   1833          ppHRegARM(i->ARMin.NDual.arg1);
   1834          vex_printf(", ");
   1835          ppHRegARM(i->ARMin.NDual.arg2);
   1836          return;
   1837       case ARMin_NBinary:
   1838          vex_printf("%s%s%s",
   1839                     showARMNeonBinOp(i->ARMin.NBinary.op),
   1840                     showARMNeonBinOpDataType(i->ARMin.NBinary.op),
   1841                     showARMNeonDataSize(i));
   1842          vex_printf("  ");
   1843          ppHRegARM(i->ARMin.NBinary.dst);
   1844          vex_printf(", ");
   1845          ppHRegARM(i->ARMin.NBinary.argL);
   1846          vex_printf(", ");
   1847          ppHRegARM(i->ARMin.NBinary.argR);
   1848          return;
   1849       case ARMin_NeonImm:
   1850          vex_printf("vmov  ");
   1851          ppHRegARM(i->ARMin.NeonImm.dst);
   1852          vex_printf(", ");
   1853          ppARMNImm(i->ARMin.NeonImm.imm);
   1854          return;
   1855       case ARMin_NCMovQ:
   1856          vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
   1857          ppHRegARM(i->ARMin.NCMovQ.dst);
   1858          vex_printf(", ");
   1859          ppHRegARM(i->ARMin.NCMovQ.src);
   1860          return;
   1861       case ARMin_Add32:
   1862          vex_printf("add32 ");
   1863          ppHRegARM(i->ARMin.Add32.rD);
   1864          vex_printf(", ");
   1865          ppHRegARM(i->ARMin.Add32.rN);
   1866          vex_printf(", ");
   1867          vex_printf("%d", i->ARMin.Add32.imm32);
   1868          return;
   1869       default:
   1870       unhandled:
   1871          vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
   1872          vpanic("ppARMInstr(1)");
   1873          return;
   1874    }
   1875 }
   1876 
   1877 
   1878 /* --------- Helpers for register allocation. --------- */
   1879 
   1880 void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
   1881 {
   1882    vassert(mode64 == False);
   1883    initHRegUsage(u);
   1884    switch (i->tag) {
   1885       case ARMin_Alu:
   1886          addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
   1887          addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
   1888          addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
   1889          return;
   1890       case ARMin_Shift:
   1891          addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
   1892          addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
   1893          addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
   1894          return;
   1895       case ARMin_Unary:
   1896          addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
   1897          addHRegUse(u, HRmRead, i->ARMin.Unary.src);
   1898          return;
   1899       case ARMin_CmpOrTst:
   1900          addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
   1901          addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
   1902          return;
   1903       case ARMin_Mov:
   1904          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
   1905          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
   1906          return;
   1907       case ARMin_Imm32:
   1908          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
   1909          return;
   1910       case ARMin_LdSt32:
   1911          addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
   1912          if (i->ARMin.LdSt32.isLoad) {
   1913             addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
   1914          } else {
   1915             addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
   1916          }
   1917          return;
   1918       case ARMin_LdSt16:
   1919          addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
   1920          if (i->ARMin.LdSt16.isLoad) {
   1921             addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
   1922          } else {
   1923             addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
   1924          }
   1925          return;
   1926       case ARMin_LdSt8U:
   1927          addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
   1928          if (i->ARMin.LdSt8U.isLoad) {
   1929             addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
   1930          } else {
   1931             addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
   1932          }
   1933          return;
   1934       case ARMin_Ld8S:
   1935          goto unhandled;
   1936       case ARMin_Goto:
   1937          /* reads the reg holding the next guest addr */
   1938          addHRegUse(u, HRmRead, i->ARMin.Goto.gnext);
   1939          /* writes it to the standard integer return register */
   1940          addHRegUse(u, HRmWrite, hregARM_R0());
   1941          /* possibly messes with the baseblock pointer */
   1942          if (i->ARMin.Goto.jk != Ijk_Boring
   1943              && i->ARMin.Goto.jk != Ijk_Call
   1944              && i->ARMin.Goto.jk != Ijk_Ret)
   1945             /* note, this is irrelevant since r8 is not actually
   1946                available to the allocator.  But still .. */
   1947             addHRegUse(u, HRmWrite, hregARM_R8());
   1948          return;
   1949       case ARMin_CMov:
   1950          addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
   1951          addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
   1952          addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
   1953          return;
   1954       case ARMin_Call:
   1955          /* logic and comments copied/modified from x86 back end */
   1956          /* This is a bit subtle. */
   1957          /* First off, claim it trashes all the caller-saved regs
   1958             which fall within the register allocator's jurisdiction.
   1959             These I believe to be r0,1,2,3.  If it turns out that r9
   1960             is also caller-saved, then we'll have to add that here
   1961             too. */
   1962          addHRegUse(u, HRmWrite, hregARM_R0());
   1963          addHRegUse(u, HRmWrite, hregARM_R1());
   1964          addHRegUse(u, HRmWrite, hregARM_R2());
   1965          addHRegUse(u, HRmWrite, hregARM_R3());
   1966          /* Now we have to state any parameter-carrying registers
   1967             which might be read.  This depends on nArgRegs. */
   1968          switch (i->ARMin.Call.nArgRegs) {
   1969             case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
   1970             case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
   1971             case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
   1972             case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
   1973             case 0: break;
   1974             default: vpanic("getRegUsage_ARM:Call:regparms");
   1975          }
   1976          /* Finally, there is the issue that the insn trashes a
   1977             register because the literal target address has to be
   1978             loaded into a register.  Fortunately, for the nArgRegs=
   1979             0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
   1980             this does not cause any further damage.  For the
   1981             nArgRegs=4 case, we'll have to choose another register
   1982             arbitrarily since all the caller saved regs are used for
   1983             parameters, and so we might as well choose r11.
   1984             */
   1985          if (i->ARMin.Call.nArgRegs == 4)
   1986             addHRegUse(u, HRmWrite, hregARM_R11());
   1987          /* Upshot of this is that the assembler really must observe
   1988             the here-stated convention of which register to use as an
   1989             address temporary, depending on nArgRegs: 0==r0,
   1990             1==r1, 2==r2, 3==r3, 4==r11 */
   1991          return;
   1992       case ARMin_Mul:
   1993          addHRegUse(u, HRmRead, hregARM_R2());
   1994          addHRegUse(u, HRmRead, hregARM_R3());
   1995          addHRegUse(u, HRmWrite, hregARM_R0());
   1996          if (i->ARMin.Mul.op != ARMmul_PLAIN)
   1997             addHRegUse(u, HRmWrite, hregARM_R1());
   1998          return;
   1999       case ARMin_LdrEX:
   2000          addHRegUse(u, HRmRead, hregARM_R0());
   2001          addHRegUse(u, HRmWrite, hregARM_R2());
   2002          if (i->ARMin.LdrEX.szB == 8)
   2003             addHRegUse(u, HRmWrite, hregARM_R3());
   2004          return;
   2005       case ARMin_StrEX:
   2006          addHRegUse(u, HRmRead, hregARM_R0());
   2007          addHRegUse(u, HRmWrite, hregARM_R1());
   2008          addHRegUse(u, HRmRead, hregARM_R2());
   2009          if (i->ARMin.StrEX.szB == 8)
   2010             addHRegUse(u, HRmRead, hregARM_R3());
   2011          return;
   2012       case ARMin_VLdStD:
   2013          addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
   2014          if (i->ARMin.VLdStD.isLoad) {
   2015             addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
   2016          } else {
   2017             addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
   2018          }
   2019          return;
   2020       case ARMin_VLdStS:
   2021          addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
   2022          if (i->ARMin.VLdStS.isLoad) {
   2023             addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
   2024          } else {
   2025             addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
   2026          }
   2027          return;
   2028       case ARMin_VAluD:
   2029          addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
   2030          addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
   2031          addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
   2032          return;
   2033       case ARMin_VAluS:
   2034          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
   2035          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
   2036          addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
   2037          return;
   2038       case ARMin_VUnaryD:
   2039          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
   2040          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
   2041          return;
   2042       case ARMin_VUnaryS:
   2043          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
   2044          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
   2045          return;
   2046       case ARMin_VCmpD:
   2047          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
   2048          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
   2049          return;
   2050       case ARMin_VCMovD:
   2051          addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
   2052          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
   2053          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
   2054          return;
   2055       case ARMin_VCMovS:
   2056          addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
   2057          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
   2058          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
   2059          return;
   2060       case ARMin_VCvtSD:
   2061          addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
   2062          addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
   2063          return;
   2064       case ARMin_VXferD:
   2065          if (i->ARMin.VXferD.toD) {
   2066             addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
   2067             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
   2068             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
   2069          } else {
   2070             addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
   2071             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
   2072             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
   2073          }
   2074          return;
   2075       case ARMin_VXferS:
   2076          if (i->ARMin.VXferS.toS) {
   2077             addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
   2078             addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
   2079          } else {
   2080             addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
   2081             addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
   2082          }
   2083          return;
   2084       case ARMin_VCvtID:
   2085          addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
   2086          addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
   2087          return;
   2088       case ARMin_FPSCR:
   2089          if (i->ARMin.FPSCR.toFPSCR)
   2090             addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
   2091          else
   2092             addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
   2093          return;
   2094       case ARMin_MFence:
   2095          return;
   2096       case ARMin_NLdStQ:
   2097          if (i->ARMin.NLdStQ.isLoad)
   2098             addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
   2099          else
   2100             addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
   2101          addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
   2102          return;
   2103       case ARMin_NLdStD:
   2104          if (i->ARMin.NLdStD.isLoad)
   2105             addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
   2106          else
   2107             addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
   2108          addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
   2109          return;
   2110       case ARMin_NUnary:
   2111          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
   2112          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
   2113          return;
   2114       case ARMin_NUnaryS:
   2115          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
   2116          addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
   2117          return;
   2118       case ARMin_NShift:
   2119          addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
   2120          addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
   2121          addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
   2122          return;
   2123       case ARMin_NDual:
   2124          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
   2125          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
   2126          addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
   2127          addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
   2128          return;
   2129       case ARMin_NBinary:
   2130          addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
   2131          /* TODO: sometimes dst is also being read! */
   2132          // XXX fix this
   2133          addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
   2134          addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
   2135          return;
   2136       case ARMin_NeonImm:
   2137          addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
   2138          return;
   2139       case ARMin_NCMovQ:
   2140          addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
   2141          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
   2142          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
   2143          return;
   2144       case ARMin_Add32:
   2145          addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
   2146          addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
   2147          return;
   2148       unhandled:
   2149       default:
   2150          ppARMInstr(i);
   2151          vpanic("getRegUsage_ARMInstr");
   2152    }
   2153 }
   2154 
   2155 
   2156 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
   2157 {
   2158    vassert(mode64 == False);
   2159    switch (i->tag) {
   2160       case ARMin_Alu:
   2161          i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
   2162          i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
   2163          mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
   2164          return;
   2165       case ARMin_Shift:
   2166          i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
   2167          i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
   2168          mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
   2169          return;
   2170       case ARMin_Unary:
   2171          i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
   2172          i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
   2173          return;
   2174       case ARMin_CmpOrTst:
   2175          i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
   2176          mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
   2177          return;
   2178       case ARMin_Mov:
   2179          i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
   2180          mapRegs_ARMRI84(m, i->ARMin.Mov.src);
   2181          return;
   2182       case ARMin_Imm32:
   2183          i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
   2184          return;
   2185       case ARMin_LdSt32:
   2186          i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
   2187          mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
   2188          return;
   2189       case ARMin_LdSt16:
   2190          i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
   2191          mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
   2192          return;
   2193       case ARMin_LdSt8U:
   2194          i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
   2195          mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
   2196          return;
   2197       case ARMin_Ld8S:
   2198          goto unhandled;
   2199       case ARMin_Goto:
   2200          i->ARMin.Goto.gnext = lookupHRegRemap(m, i->ARMin.Goto.gnext);
   2201          return;
   2202       case ARMin_CMov:
   2203          i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
   2204          mapRegs_ARMRI84(m, i->ARMin.CMov.src);
   2205          return;
   2206       case ARMin_Call:
   2207          return;
   2208       case ARMin_Mul:
   2209          return;
   2210       case ARMin_LdrEX:
   2211          return;
   2212       case ARMin_StrEX:
   2213          return;
   2214       case ARMin_VLdStD:
   2215          i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
   2216          mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
   2217          return;
   2218       case ARMin_VLdStS:
   2219          i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
   2220          mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
   2221          return;
   2222       case ARMin_VAluD:
   2223          i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
   2224          i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
   2225          i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
   2226          return;
   2227       case ARMin_VAluS:
   2228          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
   2229          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
   2230          i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
   2231          return;
   2232       case ARMin_VUnaryD:
   2233          i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
   2234          i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
   2235          return;
   2236       case ARMin_VUnaryS:
   2237          i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
   2238          i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
   2239          return;
   2240       case ARMin_VCmpD:
   2241          i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
   2242          i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
   2243          return;
   2244       case ARMin_VCMovD:
   2245          i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
   2246          i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
   2247          return;
   2248       case ARMin_VCMovS:
   2249          i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
   2250          i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
   2251          return;
   2252       case ARMin_VCvtSD:
   2253          i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
   2254          i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
   2255          return;
   2256       case ARMin_VXferD:
   2257          i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
   2258          i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
   2259          i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
   2260          return;
   2261       case ARMin_VXferS:
   2262          i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
   2263          i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
   2264          return;
   2265       case ARMin_VCvtID:
   2266          i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
   2267          i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
   2268          return;
   2269       case ARMin_FPSCR:
   2270          i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
   2271          return;
   2272       case ARMin_MFence:
   2273          return;
   2274       case ARMin_NLdStQ:
   2275          i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
   2276          mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
   2277          return;
   2278       case ARMin_NLdStD:
   2279          i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
   2280          mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
   2281          return;
   2282       case ARMin_NUnary:
   2283          i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
   2284          i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
   2285          return;
   2286       case ARMin_NUnaryS:
   2287          i->ARMin.NUnaryS.src->reg
   2288             = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
   2289          i->ARMin.NUnaryS.dst->reg
   2290             = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
   2291          return;
   2292       case ARMin_NShift:
   2293          i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
   2294          i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
   2295          i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
   2296          return;
   2297       case ARMin_NDual:
   2298          i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
   2299          i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
   2300          return;
   2301       case ARMin_NBinary:
   2302          i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
   2303          i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
   2304          i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
   2305          return;
   2306       case ARMin_NeonImm:
   2307          i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
   2308          return;
   2309       case ARMin_NCMovQ:
   2310          i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
   2311          i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
   2312          return;
   2313       case ARMin_Add32:
   2314          i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
   2315          i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
   2316       unhandled:
   2317       default:
   2318          ppARMInstr(i);
   2319          vpanic("mapRegs_ARMInstr");
   2320    }
   2321 }
   2322 
   2323 /* Figure out if i represents a reg-reg move, and if so assign the
   2324    source and destination to *src and *dst.  If in doubt say No.  Used
   2325    by the register allocator to do move coalescing.
   2326 */
   2327 Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
   2328 {
   2329    /* Moves between integer regs */
   2330    switch (i->tag) {
   2331       case ARMin_Mov:
   2332          if (i->ARMin.Mov.src->tag == ARMri84_R) {
   2333             *src = i->ARMin.Mov.src->ARMri84.R.reg;
   2334             *dst = i->ARMin.Mov.dst;
   2335             return True;
   2336          }
   2337          break;
   2338       case ARMin_VUnaryD:
   2339          if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
   2340             *src = i->ARMin.VUnaryD.src;
   2341             *dst = i->ARMin.VUnaryD.dst;
   2342             return True;
   2343          }
   2344          break;
   2345       case ARMin_VUnaryS:
   2346          if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
   2347             *src = i->ARMin.VUnaryS.src;
   2348             *dst = i->ARMin.VUnaryS.dst;
   2349             return True;
   2350          }
   2351          break;
   2352       default:
   2353          break;
   2354    }
   2355 
   2356    // todo: float, vector moves
   2357    return False;
   2358 }
   2359 
   2360 
   2361 /* Generate arm spill/reload instructions under the direction of the
   2362    register allocator.  Note it's critical these don't write the
   2363    condition codes. */
   2364 
   2365 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2366                     HReg rreg, Int offsetB, Bool mode64 )
   2367 {
   2368    HRegClass rclass;
   2369    vassert(offsetB >= 0);
   2370    vassert(!hregIsVirtual(rreg));
   2371    vassert(mode64 == False);
   2372    *i1 = *i2 = NULL;
   2373    rclass = hregClass(rreg);
   2374    switch (rclass) {
   2375       case HRcInt32:
   2376          vassert(offsetB <= 4095);
   2377          *i1 = ARMInstr_LdSt32( False/*!isLoad*/,
   2378                                 rreg,
   2379                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2380          return;
   2381       case HRcFlt32:
   2382       case HRcFlt64: {
   2383          HReg r8   = hregARM_R8();  /* baseblock */
   2384          HReg r12  = hregARM_R12(); /* spill temp */
   2385          HReg base = r8;
   2386          vassert(0 == (offsetB & 3));
   2387          if (offsetB >= 1024) {
   2388             Int offsetKB = offsetB / 1024;
   2389             /* r12 = r8 + (1024 * offsetKB) */
   2390             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2391                                ARMRI84_I84(offsetKB, 11));
   2392             offsetB -= (1024 * offsetKB);
   2393             base = r12;
   2394          }
   2395          vassert(offsetB <= 1020);
   2396          if (rclass == HRcFlt32) {
   2397             *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
   2398                                    rreg,
   2399                                    mkARMAModeV(base, offsetB) );
   2400          } else {
   2401             *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
   2402                                    rreg,
   2403                                    mkARMAModeV(base, offsetB) );
   2404          }
   2405          return;
   2406       }
   2407       case HRcVec128: {
   2408          HReg r8  = hregARM_R8();
   2409          HReg r12 = hregARM_R12();
   2410          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2411          *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
   2412          return;
   2413       }
   2414       default:
   2415          ppHRegClass(rclass);
   2416          vpanic("genSpill_ARM: unimplemented regclass");
   2417    }
   2418 }
   2419 
   2420 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2421                      HReg rreg, Int offsetB, Bool mode64 )
   2422 {
   2423    HRegClass rclass;
   2424    vassert(offsetB >= 0);
   2425    vassert(!hregIsVirtual(rreg));
   2426    vassert(mode64 == False);
   2427    *i1 = *i2 = NULL;
   2428    rclass = hregClass(rreg);
   2429    switch (rclass) {
   2430       case HRcInt32:
   2431          vassert(offsetB <= 4095);
   2432          *i1 = ARMInstr_LdSt32( True/*isLoad*/,
   2433                                 rreg,
   2434                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2435          return;
   2436       case HRcFlt32:
   2437       case HRcFlt64: {
   2438          HReg r8   = hregARM_R8();  /* baseblock */
   2439          HReg r12  = hregARM_R12(); /* spill temp */
   2440          HReg base = r8;
   2441          vassert(0 == (offsetB & 3));
   2442          if (offsetB >= 1024) {
   2443             Int offsetKB = offsetB / 1024;
   2444             /* r12 = r8 + (1024 * offsetKB) */
   2445             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2446                                ARMRI84_I84(offsetKB, 11));
   2447             offsetB -= (1024 * offsetKB);
   2448             base = r12;
   2449          }
   2450          vassert(offsetB <= 1020);
   2451          if (rclass == HRcFlt32) {
   2452             *i2 = ARMInstr_VLdStS( True/*isLoad*/,
   2453                                    rreg,
   2454                                    mkARMAModeV(base, offsetB) );
   2455          } else {
   2456             *i2 = ARMInstr_VLdStD( True/*isLoad*/,
   2457                                    rreg,
   2458                                    mkARMAModeV(base, offsetB) );
   2459          }
   2460          return;
   2461       }
   2462       case HRcVec128: {
   2463          HReg r8  = hregARM_R8();
   2464          HReg r12 = hregARM_R12();
   2465          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2466          *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
   2467          return;
   2468       }
   2469       default:
   2470          ppHRegClass(rclass);
   2471          vpanic("genReload_ARM: unimplemented regclass");
   2472    }
   2473 }
   2474 
   2475 
   2476 /* Emit an instruction into buf and return the number of bytes used.
   2477    Note that buf is not the insn's final place, and therefore it is
   2478    imperative to emit position-independent code. */
   2479 
   2480 static inline UChar iregNo ( HReg r )
   2481 {
   2482    UInt n;
   2483    vassert(hregClass(r) == HRcInt32);
   2484    vassert(!hregIsVirtual(r));
   2485    n = hregNumber(r);
   2486    vassert(n <= 15);
   2487    return toUChar(n);
   2488 }
   2489 
   2490 static inline UChar dregNo ( HReg r )
   2491 {
   2492    UInt n;
   2493    if (hregClass(r) != HRcFlt64)
   2494       ppHRegClass(hregClass(r));
   2495    vassert(hregClass(r) == HRcFlt64);
   2496    vassert(!hregIsVirtual(r));
   2497    n = hregNumber(r);
   2498    vassert(n <= 31);
   2499    return toUChar(n);
   2500 }
   2501 
   2502 static inline UChar fregNo ( HReg r )
   2503 {
   2504    UInt n;
   2505    vassert(hregClass(r) == HRcFlt32);
   2506    vassert(!hregIsVirtual(r));
   2507    n = hregNumber(r);
   2508    vassert(n <= 31);
   2509    return toUChar(n);
   2510 }
   2511 
   2512 static inline UChar qregNo ( HReg r )
   2513 {
   2514    UInt n;
   2515    vassert(hregClass(r) == HRcVec128);
   2516    vassert(!hregIsVirtual(r));
   2517    n = hregNumber(r);
   2518    vassert(n <= 15);
   2519    return toUChar(n);
   2520 }
   2521 
   2522 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
   2523    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
   2524 #define X0000  BITS4(0,0,0,0)
   2525 #define X0001  BITS4(0,0,0,1)
   2526 #define X0010  BITS4(0,0,1,0)
   2527 #define X0011  BITS4(0,0,1,1)
   2528 #define X0100  BITS4(0,1,0,0)
   2529 #define X0101  BITS4(0,1,0,1)
   2530 #define X0110  BITS4(0,1,1,0)
   2531 #define X0111  BITS4(0,1,1,1)
   2532 #define X1000  BITS4(1,0,0,0)
   2533 #define X1001  BITS4(1,0,0,1)
   2534 #define X1010  BITS4(1,0,1,0)
   2535 #define X1011  BITS4(1,0,1,1)
   2536 #define X1100  BITS4(1,1,0,0)
   2537 #define X1101  BITS4(1,1,0,1)
   2538 #define X1110  BITS4(1,1,1,0)
   2539 #define X1111  BITS4(1,1,1,1)
   2540 
   2541 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
   2542    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2543     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2544     (((zzx3) & 0xF) << 12))
   2545 
   2546 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
   2547    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2548     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2549     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
   2550 
   2551 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
   2552    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2553     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2554     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
   2555 
   2556 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
   2557   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
   2558    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
   2559    (((zzx0) & 0xF) << 0))
   2560 
   2561 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
   2562    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2563     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2564     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
   2565     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
   2566 
   2567 /* Generate a skeletal insn that involves an a RI84 shifter operand.
   2568    Returns a word which is all zeroes apart from bits 25 and 11..0,
   2569    since it is those that encode the shifter operand (at least to the
   2570    extent that we care about it.) */
   2571 static UInt skeletal_RI84 ( ARMRI84* ri )
   2572 {
   2573    UInt instr;
   2574    if (ri->tag == ARMri84_I84) {
   2575       vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
   2576       vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
   2577       instr = 1 << 25;
   2578       instr |= (ri->ARMri84.I84.imm4 << 8);
   2579       instr |= ri->ARMri84.I84.imm8;
   2580    } else {
   2581       instr = 0 << 25;
   2582       instr |= iregNo(ri->ARMri84.R.reg);
   2583    }
   2584    return instr;
   2585 }
   2586 
   2587 /* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
   2588    11..7. */
   2589 static UInt skeletal_RI5 ( ARMRI5* ri )
   2590 {
   2591    UInt instr;
   2592    if (ri->tag == ARMri5_I5) {
   2593       UInt imm5 = ri->ARMri5.I5.imm5;
   2594       vassert(imm5 >= 1 && imm5 <= 31);
   2595       instr = 0 << 4;
   2596       instr |= imm5 << 7;
   2597    } else {
   2598       instr = 1 << 4;
   2599       instr |= iregNo(ri->ARMri5.R.reg) << 8;
   2600    }
   2601    return instr;
   2602 }
   2603 
   2604 
   2605 /* Get an immediate into a register, using only that
   2606    register.  (very lame..) */
   2607 static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
   2608 {
   2609    UInt instr;
   2610    vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
   2611 #if 0
   2612    if (0 == (imm32 & ~0xFF)) {
   2613       /* mov with a immediate shifter operand of (0, imm32) (??) */
   2614       instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
   2615       instr |= imm32;
   2616       *p++ = instr;
   2617    } else {
   2618       // this is very bad; causes Dcache pollution
   2619       // ldr  rD, [pc]
   2620       instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
   2621       *p++ = instr;
   2622       // b .+8
   2623       instr = 0xEA000000;
   2624       *p++ = instr;
   2625       // .word imm32
   2626       *p++ = imm32;
   2627    }
   2628 #else
   2629    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2630       /* Generate movw rD, #low16.  Then, if the high 16 are
   2631          nonzero, generate movt rD, #high16. */
   2632       UInt lo16 = imm32 & 0xFFFF;
   2633       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2634       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2635                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2636                        lo16 & 0xF);
   2637       *p++ = instr;
   2638       if (hi16 != 0) {
   2639          instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2640                           (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2641                           hi16 & 0xF);
   2642          *p++ = instr;
   2643       }
   2644    } else {
   2645       UInt imm, rot;
   2646       UInt op = X1010;
   2647       UInt rN = 0;
   2648       if ((imm32 & 0xFF) || (imm32 == 0)) {
   2649          imm = imm32 & 0xFF;
   2650          rot = 0;
   2651          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2652          *p++ = instr;
   2653          op = X1000;
   2654          rN = rD;
   2655       }
   2656       if (imm32 & 0xFF000000) {
   2657          imm = (imm32 >> 24) & 0xFF;
   2658          rot = 4;
   2659          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2660          *p++ = instr;
   2661          op = X1000;
   2662          rN = rD;
   2663       }
   2664       if (imm32 & 0xFF0000) {
   2665          imm = (imm32 >> 16) & 0xFF;
   2666          rot = 8;
   2667          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2668          *p++ = instr;
   2669          op = X1000;
   2670          rN = rD;
   2671       }
   2672       if (imm32 & 0xFF00) {
   2673          imm = (imm32 >> 8) & 0xFF;
   2674          rot = 12;
   2675          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2676          *p++ = instr;
   2677          op = X1000;
   2678          rN = rD;
   2679       }
   2680    }
   2681 #endif
   2682    return p;
   2683 }
   2684 
   2685 
   2686 Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i,
   2687                     Bool mode64, void* dispatch )
   2688 {
   2689    UInt* p = (UInt*)buf;
   2690    vassert(nbuf >= 32);
   2691    vassert(mode64 == False);
   2692    vassert(0 == (((HWord)buf) & 3));
   2693    /* since we branch to lr(r13) to get back to dispatch: */
   2694    vassert(dispatch == NULL);
   2695 
   2696    switch (i->tag) {
   2697       case ARMin_Alu: {
   2698          UInt     instr, subopc;
   2699          UInt     rD   = iregNo(i->ARMin.Alu.dst);
   2700          UInt     rN   = iregNo(i->ARMin.Alu.argL);
   2701          ARMRI84* argR = i->ARMin.Alu.argR;
   2702          switch (i->ARMin.Alu.op) {
   2703             case ARMalu_ADDS: /* fallthru */
   2704             case ARMalu_ADD:  subopc = X0100; break;
   2705             case ARMalu_ADC:  subopc = X0101; break;
   2706             case ARMalu_SUBS: /* fallthru */
   2707             case ARMalu_SUB:  subopc = X0010; break;
   2708             case ARMalu_SBC:  subopc = X0110; break;
   2709             case ARMalu_AND:  subopc = X0000; break;
   2710             case ARMalu_BIC:  subopc = X1110; break;
   2711             case ARMalu_OR:   subopc = X1100; break;
   2712             case ARMalu_XOR:  subopc = X0001; break;
   2713             default: goto bad;
   2714          }
   2715          instr = skeletal_RI84(argR);
   2716          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2717                            (subopc << 1) & 0xF, rN, rD);
   2718          if (i->ARMin.Alu.op == ARMalu_ADDS
   2719              || i->ARMin.Alu.op == ARMalu_SUBS) {
   2720             instr |= 1<<20;  /* set the S bit */
   2721          }
   2722          *p++ = instr;
   2723          goto done;
   2724       }
   2725       case ARMin_Shift: {
   2726          UInt    instr, subopc;
   2727          HReg    rD   = iregNo(i->ARMin.Shift.dst);
   2728          HReg    rM   = iregNo(i->ARMin.Shift.argL);
   2729          ARMRI5* argR = i->ARMin.Shift.argR;
   2730          switch (i->ARMin.Shift.op) {
   2731             case ARMsh_SHL: subopc = X0000; break;
   2732             case ARMsh_SHR: subopc = X0001; break;
   2733             case ARMsh_SAR: subopc = X0010; break;
   2734             default: goto bad;
   2735          }
   2736          instr = skeletal_RI5(argR);
   2737          instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
   2738          instr |= (subopc & 3) << 5;
   2739          *p++ = instr;
   2740          goto done;
   2741       }
   2742       case ARMin_Unary: {
   2743          UInt instr;
   2744          HReg rDst = iregNo(i->ARMin.Unary.dst);
   2745          HReg rSrc = iregNo(i->ARMin.Unary.src);
   2746          switch (i->ARMin.Unary.op) {
   2747             case ARMun_CLZ:
   2748                instr = XXXXXXXX(X1110,X0001,X0110,X1111,
   2749                                 rDst,X1111,X0001,rSrc);
   2750                *p++ = instr;
   2751                goto done;
   2752             case ARMun_NEG: /* RSB rD,rS,#0 */
   2753                instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
   2754                *p++ = instr;
   2755                goto done;
   2756             case ARMun_NOT: {
   2757                UInt subopc = X1111; /* MVN */
   2758                instr = rSrc;
   2759                instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2760                                  (subopc << 1) & 0xF, 0, rDst);
   2761                *p++ = instr;
   2762                goto done;
   2763             }
   2764             default:
   2765                break;
   2766          }
   2767          goto bad;
   2768       }
   2769       case ARMin_CmpOrTst: {
   2770          UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
   2771          UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
   2772          UInt SBZ    = 0;
   2773          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2774                            ((subopc << 1) & 0xF) | 1,
   2775                            i->ARMin.CmpOrTst.argL, SBZ );
   2776          *p++ = instr;
   2777          goto done;
   2778       }
   2779       case ARMin_Mov: {
   2780          UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
   2781          UInt subopc = X1101; /* MOV */
   2782          UInt SBZ    = 0;
   2783          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2784                            (subopc << 1) & 0xF, SBZ, i->ARMin.Mov.dst);
   2785          *p++ = instr;
   2786          goto done;
   2787       }
   2788       case ARMin_Imm32: {
   2789          p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
   2790                                         i->ARMin.Imm32.imm32 );
   2791          goto done;
   2792       }
   2793       case ARMin_LdSt32:
   2794       case ARMin_LdSt8U: {
   2795          UInt       bL, bB;
   2796          HReg       rD;
   2797          ARMAMode1* am;
   2798          if (i->tag == ARMin_LdSt32) {
   2799             bB = 0;
   2800             bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
   2801             am = i->ARMin.LdSt32.amode;
   2802             rD = i->ARMin.LdSt32.rD;
   2803          } else {
   2804             bB = 1;
   2805             bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
   2806             am = i->ARMin.LdSt8U.amode;
   2807             rD = i->ARMin.LdSt8U.rD;
   2808          }
   2809          if (am->tag == ARMam1_RI) {
   2810             Int  simm12;
   2811             UInt instr, bP;
   2812             if (am->ARMam1.RI.simm13 < 0) {
   2813                bP = 0;
   2814                simm12 = -am->ARMam1.RI.simm13;
   2815             } else {
   2816                bP = 1;
   2817                simm12 = am->ARMam1.RI.simm13;
   2818             }
   2819             vassert(simm12 >= 0 && simm12 <= 4095);
   2820             instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
   2821                              iregNo(am->ARMam1.RI.reg),
   2822                              iregNo(rD));
   2823             instr |= simm12;
   2824             *p++ = instr;
   2825             goto done;
   2826          } else {
   2827             // RR case
   2828             goto bad;
   2829          }
   2830       }
   2831       case ARMin_LdSt16: {
   2832          HReg       rD = i->ARMin.LdSt16.rD;
   2833          UInt       bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
   2834          UInt       bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
   2835          ARMAMode2* am = i->ARMin.LdSt16.amode;
   2836          if (am->tag == ARMam2_RI) {
   2837             HReg rN = am->ARMam2.RI.reg;
   2838             Int  simm8;
   2839             UInt bP, imm8hi, imm8lo, instr;
   2840             if (am->ARMam2.RI.simm9 < 0) {
   2841                bP = 0;
   2842                simm8 = -am->ARMam2.RI.simm9;
   2843             } else {
   2844                bP = 1;
   2845                simm8 = am->ARMam2.RI.simm9;
   2846             }
   2847             vassert(simm8 >= 0 && simm8 <= 255);
   2848             imm8hi = (simm8 >> 4) & 0xF;
   2849             imm8lo = simm8 & 0xF;
   2850             vassert(!(bL == 0 && bS == 1)); // "! signed store"
   2851             /**/ if (bL == 0 && bS == 0) {
   2852                // strh
   2853                instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,0), iregNo(rN),
   2854                                 iregNo(rD), imm8hi, X1011, imm8lo);
   2855                *p++ = instr;
   2856                goto done;
   2857             }
   2858             else if (bL == 1 && bS == 0) {
   2859                // ldrh
   2860                instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,1), iregNo(rN),
   2861                                 iregNo(rD), imm8hi, X1011, imm8lo);
   2862                *p++ = instr;
   2863                goto done;
   2864             }
   2865             else if (bL == 1 && bS == 1) {
   2866                goto bad;
   2867             }
   2868             else vassert(0); // ill-constructed insn
   2869          } else {
   2870             // RR case
   2871             goto bad;
   2872          }
   2873       }
   2874       case ARMin_Ld8S:
   2875          goto bad;
   2876       case ARMin_Goto: {
   2877          UInt        instr;
   2878          IRJumpKind  jk    = i->ARMin.Goto.jk;
   2879          ARMCondCode cond  = i->ARMin.Goto.cond;
   2880          UInt        rnext = iregNo(i->ARMin.Goto.gnext);
   2881          Int         trc   = -1;
   2882          switch (jk) {
   2883             case Ijk_Ret: case Ijk_Call: case Ijk_Boring:
   2884                break; /* no need to set GST in these common cases */
   2885             case Ijk_ClientReq:
   2886                trc = VEX_TRC_JMP_CLIENTREQ; break;
   2887             case Ijk_Sys_int128:
   2888             case Ijk_Sys_int129:
   2889             case Ijk_Sys_int130:
   2890             case Ijk_Yield:
   2891             case Ijk_EmWarn:
   2892             case Ijk_MapFail:
   2893                goto unhandled_jk;
   2894             case Ijk_YieldNoRedir:
   2895                trc = VEX_TRC_JMP_YIELD_NOREDIR; break;
   2896             case Ijk_NoDecode:
   2897                trc = VEX_TRC_JMP_NODECODE; break;
   2898             case Ijk_TInval:
   2899                trc = VEX_TRC_JMP_TINVAL; break;
   2900             case Ijk_NoRedir:
   2901                trc = VEX_TRC_JMP_NOREDIR; break;
   2902             case Ijk_Sys_sysenter:
   2903             case Ijk_SigTRAP:
   2904             case Ijk_SigSEGV:
   2905                goto unhandled_jk;
   2906             case Ijk_Sys_syscall:
   2907                trc = VEX_TRC_JMP_SYS_SYSCALL; break;
   2908             unhandled_jk:
   2909             default:
   2910                goto bad;
   2911          }
   2912          if (trc != -1) {
   2913             // mov{cond} r8, #trc
   2914             vassert(trc >= 0 && trc <= 255);
   2915             instr = (cond << 28) | 0x03A08000 | (0xFF & (UInt)trc);
   2916             *p++ = instr;
   2917          }
   2918          // mov{cond} r0, rnext
   2919          if (rnext != 0) {
   2920             instr = (cond << 28) | 0x01A00000 | rnext;
   2921             *p++ = instr;
   2922          }
   2923          // bx{cond} r14
   2924          instr =(cond << 28) | 0x012FFF1E;
   2925          *p++ = instr;
   2926          goto done;
   2927       }
   2928       case ARMin_CMov: {
   2929          UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
   2930          UInt subopc = X1101; /* MOV */
   2931          UInt SBZ    = 0;
   2932          instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
   2933                            (subopc << 1) & 0xF, SBZ, i->ARMin.CMov.dst);
   2934          *p++ = instr;
   2935          goto done;
   2936       }
   2937       case ARMin_Call: {
   2938          UInt instr;
   2939          /* Decide on a scratch reg used to hold to the call address.
   2940             This has to be done as per the comments in getRegUsage. */
   2941          Int scratchNo;
   2942          switch (i->ARMin.Call.nArgRegs) {
   2943             case 0:  scratchNo = 0;  break;
   2944             case 1:  scratchNo = 1;  break;
   2945             case 2:  scratchNo = 2;  break;
   2946             case 3:  scratchNo = 3;  break;
   2947             case 4:  scratchNo = 11; break;
   2948             default: vassert(0);
   2949          }
   2950          // r"scratchNo" = &target
   2951          p = imm32_to_iregNo( (UInt*)p,
   2952                               scratchNo, (UInt)i->ARMin.Call.target );
   2953          // blx{cond} r"scratchNo"
   2954          instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
   2955                           X0011, scratchNo);
   2956          instr |= 0xFFF << 8; // stick in the SBOnes
   2957          *p++ = instr;
   2958          goto done;
   2959       }
   2960       case ARMin_Mul: {
   2961          /* E0000392   mul     r0, r2, r3
   2962             E0810392   umull   r0(LO), r1(HI), r2, r3
   2963             E0C10392   smull   r0(LO), r1(HI), r2, r3
   2964          */
   2965          switch (i->ARMin.Mul.op) {
   2966             case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
   2967             case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
   2968             case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
   2969             default: vassert(0);
   2970          }
   2971          goto bad;
   2972       }
   2973       case ARMin_LdrEX: {
   2974          /* E1B01F9F   ldrexd   r2, r3, [r0]
   2975             E1901F9F   ldrex    r2, [r0]
   2976             E1F01F9F   ldrexh   r2, [r0]
   2977             E1D01F9F   ldrexb   r2, [r0]
   2978          */
   2979          switch (i->ARMin.LdrEX.szB) {
   2980             case 8: *p++ = 0xE1B02F9F; goto done;
   2981             case 4: *p++ = 0xE1902F9F; goto done;
   2982             //case 2: *p++ = 0xE1F02F9F; goto done;
   2983             case 1: *p++ = 0xE1D02F9F; goto done;
   2984             default: break;
   2985          }
   2986          goto bad;
   2987       }
   2988       case ARMin_StrEX: {
   2989          /* E1A01F92   strexd  r1, r2, r3, [r0]
   2990             E1801F92   strex   r1, r2, [r0]
   2991             E1E01F92   strexh  r1, r2, [r0]
   2992             E1C01F92   strexb  r1, r2, [r0]
   2993          */
   2994          switch (i->ARMin.StrEX.szB) {
   2995             case 8: *p++ = 0xE1A01F92; goto done;
   2996             case 4: *p++ = 0xE1801F92; goto done;
   2997             //case 2: *p++ = 0xE1E01F92; goto done;
   2998             case 1: *p++ = 0xE1C01F92; goto done;
   2999             default: break;
   3000          }
   3001          goto bad;
   3002       }
   3003       case ARMin_VLdStD: {
   3004          UInt dD     = dregNo(i->ARMin.VLdStD.dD);
   3005          UInt rN     = iregNo(i->ARMin.VLdStD.amode->reg);
   3006          Int  simm11 = i->ARMin.VLdStD.amode->simm11;
   3007          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3008          UInt bU     = simm11 >= 0 ? 1 : 0;
   3009          UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
   3010          UInt insn;
   3011          vassert(0 == (off8 & 3));
   3012          off8 >>= 2;
   3013          vassert(0 == (off8 & 0xFFFFFF00));
   3014          insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
   3015          insn |= off8;
   3016          *p++ = insn;
   3017          goto done;
   3018       }
   3019       case ARMin_VLdStS: {
   3020          UInt fD     = fregNo(i->ARMin.VLdStS.fD);
   3021          UInt rN     = iregNo(i->ARMin.VLdStS.amode->reg);
   3022          Int  simm11 = i->ARMin.VLdStS.amode->simm11;
   3023          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3024          UInt bU     = simm11 >= 0 ? 1 : 0;
   3025          UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
   3026          UInt bD     = fD & 1;
   3027          UInt insn;
   3028          vassert(0 == (off8 & 3));
   3029          off8 >>= 2;
   3030          vassert(0 == (off8 & 0xFFFFFF00));
   3031          insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
   3032          insn |= off8;
   3033          *p++ = insn;
   3034          goto done;
   3035       }
   3036       case ARMin_VAluD: {
   3037          UInt dN = dregNo(i->ARMin.VAluD.argL);
   3038          UInt dD = dregNo(i->ARMin.VAluD.dst);
   3039          UInt dM = dregNo(i->ARMin.VAluD.argR);
   3040          UInt pqrs = X1111; /* undefined */
   3041          switch (i->ARMin.VAluD.op) {
   3042             case ARMvfp_ADD: pqrs = X0110; break;
   3043             case ARMvfp_SUB: pqrs = X0111; break;
   3044             case ARMvfp_MUL: pqrs = X0100; break;
   3045             case ARMvfp_DIV: pqrs = X1000; break;
   3046             default: goto bad;
   3047          }
   3048          vassert(pqrs != X1111);
   3049          UInt bP  = (pqrs >> 3) & 1;
   3050          UInt bQ  = (pqrs >> 2) & 1;
   3051          UInt bR  = (pqrs >> 1) & 1;
   3052          UInt bS  = (pqrs >> 0) & 1;
   3053          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
   3054                               X1011, BITS4(0,bS,0,0), dM);
   3055          *p++ = insn;
   3056          goto done;
   3057       }
   3058       case ARMin_VAluS: {
   3059          UInt dN = fregNo(i->ARMin.VAluS.argL);
   3060          UInt dD = fregNo(i->ARMin.VAluS.dst);
   3061          UInt dM = fregNo(i->ARMin.VAluS.argR);
   3062          UInt bN = dN & 1;
   3063          UInt bD = dD & 1;
   3064          UInt bM = dM & 1;
   3065          UInt pqrs = X1111; /* undefined */
   3066          switch (i->ARMin.VAluS.op) {
   3067             case ARMvfp_ADD: pqrs = X0110; break;
   3068             case ARMvfp_SUB: pqrs = X0111; break;
   3069             case ARMvfp_MUL: pqrs = X0100; break;
   3070             case ARMvfp_DIV: pqrs = X1000; break;
   3071             default: goto bad;
   3072          }
   3073          vassert(pqrs != X1111);
   3074          UInt bP  = (pqrs >> 3) & 1;
   3075          UInt bQ  = (pqrs >> 2) & 1;
   3076          UInt bR  = (pqrs >> 1) & 1;
   3077          UInt bS  = (pqrs >> 0) & 1;
   3078          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
   3079                               (dN >> 1), (dD >> 1),
   3080                               X1010, BITS4(bN,bS,bM,0), (dM >> 1));
   3081          *p++ = insn;
   3082          goto done;
   3083       }
   3084       case ARMin_VUnaryD: {
   3085          UInt dD   = dregNo(i->ARMin.VUnaryD.dst);
   3086          UInt dM   = dregNo(i->ARMin.VUnaryD.src);
   3087          UInt insn = 0;
   3088          switch (i->ARMin.VUnaryD.op) {
   3089             case ARMvfpu_COPY:
   3090                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3091                break;
   3092             case ARMvfpu_ABS:
   3093                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
   3094                break;
   3095             case ARMvfpu_NEG:
   3096                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
   3097                break;
   3098             case ARMvfpu_SQRT:
   3099                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
   3100                break;
   3101             default:
   3102                goto bad;
   3103          }
   3104          *p++ = insn;
   3105          goto done;
   3106       }
   3107       case ARMin_VUnaryS: {
   3108          UInt fD   = fregNo(i->ARMin.VUnaryS.dst);
   3109          UInt fM   = fregNo(i->ARMin.VUnaryS.src);
   3110          UInt insn = 0;
   3111          switch (i->ARMin.VUnaryS.op) {
   3112             case ARMvfpu_COPY:
   3113                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3114                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3115                                (fM >> 1));
   3116                break;
   3117             case ARMvfpu_ABS:
   3118                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3119                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3120                                (fM >> 1));
   3121                break;
   3122             case ARMvfpu_NEG:
   3123                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3124                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3125                                (fM >> 1));
   3126                break;
   3127             case ARMvfpu_SQRT:
   3128                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3129                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3130                                (fM >> 1));
   3131                break;
   3132             default:
   3133                goto bad;
   3134          }
   3135          *p++ = insn;
   3136          goto done;
   3137       }
   3138       case ARMin_VCmpD: {
   3139          UInt dD   = dregNo(i->ARMin.VCmpD.argL);
   3140          UInt dM   = dregNo(i->ARMin.VCmpD.argR);
   3141          UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
   3142          *p++ = insn;       /* FCMPD dD, dM */
   3143          *p++ = 0xEEF1FA10; /* FMSTAT */
   3144          goto done;
   3145       }
   3146       case ARMin_VCMovD: {
   3147          UInt cc = (UInt)i->ARMin.VCMovD.cond;
   3148          UInt dD = dregNo(i->ARMin.VCMovD.dst);
   3149          UInt dM = dregNo(i->ARMin.VCMovD.src);
   3150          vassert(cc < 16 && cc != ARMcc_AL);
   3151          UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3152          *p++ = insn;
   3153          goto done;
   3154       }
   3155       case ARMin_VCMovS: {
   3156          UInt cc = (UInt)i->ARMin.VCMovS.cond;
   3157          UInt fD = fregNo(i->ARMin.VCMovS.dst);
   3158          UInt fM = fregNo(i->ARMin.VCMovS.src);
   3159          vassert(cc < 16 && cc != ARMcc_AL);
   3160          UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
   3161                               X0000,(fD >> 1),X1010,
   3162                               BITS4(0,1,(fM & 1),0), (fM >> 1));
   3163          *p++ = insn;
   3164          goto done;
   3165       }
   3166       case ARMin_VCvtSD: {
   3167          if (i->ARMin.VCvtSD.sToD) {
   3168             UInt dD = dregNo(i->ARMin.VCvtSD.dst);
   3169             UInt fM = fregNo(i->ARMin.VCvtSD.src);
   3170             UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
   3171                                  BITS4(1,1, (fM & 1), 0),
   3172                                  (fM >> 1));
   3173             *p++ = insn;
   3174             goto done;
   3175          } else {
   3176             UInt fD = fregNo(i->ARMin.VCvtSD.dst);
   3177             UInt dM = dregNo(i->ARMin.VCvtSD.src);
   3178             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
   3179                                  X0111, (fD >> 1),
   3180                                  X1011, X1100, dM);
   3181             *p++ = insn;
   3182             goto done;
   3183          }
   3184          goto bad;
   3185       }
   3186       case ARMin_VXferD: {
   3187          UInt dD  = dregNo(i->ARMin.VXferD.dD);
   3188          UInt rHi = iregNo(i->ARMin.VXferD.rHi);
   3189          UInt rLo = iregNo(i->ARMin.VXferD.rLo);
   3190          /* vmov dD, rLo, rHi is
   3191             E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3192             vmov rLo, rHi, dD is
   3193             E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3194          */
   3195          UInt insn
   3196             = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
   3197                        rHi, rLo, 0xB,
   3198                        BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
   3199          *p++ = insn;
   3200          goto done;
   3201       }
   3202       case ARMin_VXferS: {
   3203          UInt fD  = fregNo(i->ARMin.VXferS.fD);
   3204          UInt rLo = iregNo(i->ARMin.VXferS.rLo);
   3205          /* vmov fD, rLo is
   3206             E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
   3207             vmov rLo, fD is
   3208             E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
   3209          */
   3210          UInt insn
   3211             = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
   3212                        (fD >> 1) & 0xF, rLo, 0xA,
   3213                        BITS4((fD & 1),0,0,1), 0);
   3214          *p++ = insn;
   3215          goto done;
   3216       }
   3217       case ARMin_VCvtID: {
   3218          Bool iToD = i->ARMin.VCvtID.iToD;
   3219          Bool syned = i->ARMin.VCvtID.syned;
   3220          if (iToD && syned) {
   3221             // FSITOD: I32S-in-freg to F64-in-dreg
   3222             UInt regF = fregNo(i->ARMin.VCvtID.src);
   3223             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   3224             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3225                                  X1011, BITS4(1,1,(regF & 1),0),
   3226                                  (regF >> 1) & 0xF);
   3227             *p++ = insn;
   3228             goto done;
   3229          }
   3230          if (iToD && (!syned)) {
   3231             // FUITOD: I32U-in-freg to F64-in-dreg
   3232             UInt regF = fregNo(i->ARMin.VCvtID.src);
   3233             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   3234             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3235                                  X1011, BITS4(0,1,(regF & 1),0),
   3236                                  (regF >> 1) & 0xF);
   3237             *p++ = insn;
   3238             goto done;
   3239          }
   3240          if ((!iToD) && syned) {
   3241             // FTOSID: F64-in-dreg to I32S-in-freg
   3242             UInt regD = dregNo(i->ARMin.VCvtID.src);
   3243             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   3244             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3245                                  X1101, (regF >> 1) & 0xF,
   3246                                  X1011, X0100, regD);
   3247             *p++ = insn;
   3248             goto done;
   3249          }
   3250          if ((!iToD) && (!syned)) {
   3251             // FTOUID: F64-in-dreg to I32U-in-freg
   3252             UInt regD = dregNo(i->ARMin.VCvtID.src);
   3253             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   3254             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3255                                  X1100, (regF >> 1) & 0xF,
   3256                                  X1011, X0100, regD);
   3257             *p++ = insn;
   3258             goto done;
   3259          }
   3260          /*UNREACHED*/
   3261          vassert(0);
   3262       }
   3263       case ARMin_FPSCR: {
   3264          Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
   3265          HReg iReg    = iregNo(i->ARMin.FPSCR.iReg);
   3266          if (toFPSCR) {
   3267             /* fmxr fpscr, iReg is EEE1 iReg A10 */
   3268             *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
   3269             goto done;
   3270          }
   3271          goto bad; // FPSCR -> iReg case currently ATC
   3272       }
   3273       case ARMin_MFence: {
   3274          *p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
   3275          *p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
   3276          *p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
   3277          goto done;
   3278       }
   3279       case ARMin_NLdStQ: {
   3280          UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
   3281          UInt regN, regM;
   3282          UInt D = regD >> 4;
   3283          UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
   3284          UInt insn;
   3285          vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
   3286          regD &= 0xF;
   3287          if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
   3288             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
   3289             regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
   3290          } else {
   3291             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
   3292             regM = 15;
   3293          }
   3294          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   3295                               regN, regD, X1010, X1000, regM);
   3296          *p++ = insn;
   3297          goto done;
   3298       }
   3299       case ARMin_NLdStD: {
   3300          UInt regD = dregNo(i->ARMin.NLdStD.dD);
   3301          UInt regN, regM;
   3302          UInt D = regD >> 4;
   3303          UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
   3304          UInt insn;
   3305          vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
   3306          regD &= 0xF;
   3307          if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
   3308             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
   3309             regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
   3310          } else {
   3311             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
   3312             regM = 15;
   3313          }
   3314          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   3315                               regN, regD, X0111, X1000, regM);
   3316          *p++ = insn;
   3317          goto done;
   3318       }
   3319       case ARMin_NUnaryS: {
   3320          UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
   3321          UInt regD, D;
   3322          UInt regM, M;
   3323          UInt size = i->ARMin.NUnaryS.size;
   3324          UInt insn;
   3325          UInt opc, opc1, opc2;
   3326          switch (i->ARMin.NUnaryS.op) {
   3327 	    case ARMneon_VDUP:
   3328                if (i->ARMin.NUnaryS.size >= 16)
   3329                   goto bad;
   3330                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
   3331                   goto bad;
   3332                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3333                   goto bad;
   3334                regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
   3335                         ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
   3336                         : dregNo(i->ARMin.NUnaryS.dst->reg);
   3337                regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
   3338                         ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
   3339                         : dregNo(i->ARMin.NUnaryS.src->reg);
   3340                D = regD >> 4;
   3341                M = regM >> 4;
   3342                regD &= 0xf;
   3343                regM &= 0xf;
   3344                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
   3345                                (i->ARMin.NUnaryS.size & 0xf), regD,
   3346                                X1100, BITS4(0,Q,M,0), regM);
   3347                *p++ = insn;
   3348                goto done;
   3349             case ARMneon_SETELEM:
   3350                regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
   3351                                 dregNo(i->ARMin.NUnaryS.dst->reg);
   3352                regM = iregNo(i->ARMin.NUnaryS.src->reg);
   3353                M = regM >> 4;
   3354                D = regD >> 4;
   3355                regM &= 0xF;
   3356                regD &= 0xF;
   3357                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
   3358                   goto bad;
   3359                switch (size) {
   3360                   case 0:
   3361                      if (i->ARMin.NUnaryS.dst->index > 7)
   3362                         goto bad;
   3363                      opc = X1000 | i->ARMin.NUnaryS.dst->index;
   3364                      break;
   3365                   case 1:
   3366                      if (i->ARMin.NUnaryS.dst->index > 3)
   3367                         goto bad;
   3368                      opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
   3369                      break;
   3370                   case 2:
   3371                      if (i->ARMin.NUnaryS.dst->index > 1)
   3372                         goto bad;
   3373                      opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
   3374                      break;
   3375                   default:
   3376                      goto bad;
   3377                }
   3378                opc1 = (opc >> 2) & 3;
   3379                opc2 = opc & 3;
   3380                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
   3381                                regD, regM, X1011,
   3382                                BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
   3383                *p++ = insn;
   3384                goto done;
   3385             case ARMneon_GETELEMU:
   3386                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   3387                                 dregNo(i->ARMin.NUnaryS.src->reg);
   3388                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   3389                M = regM >> 4;
   3390                D = regD >> 4;
   3391                regM &= 0xF;
   3392                regD &= 0xF;
   3393                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3394                   goto bad;
   3395                switch (size) {
   3396                   case 0:
   3397                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   3398                         regM++;
   3399                         i->ARMin.NUnaryS.src->index -= 8;
   3400                      }
   3401                      if (i->ARMin.NUnaryS.src->index > 7)
   3402                         goto bad;
   3403                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   3404                      break;
   3405                   case 1:
   3406                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   3407                         regM++;
   3408                         i->ARMin.NUnaryS.src->index -= 4;
   3409                      }
   3410                      if (i->ARMin.NUnaryS.src->index > 3)
   3411                         goto bad;
   3412                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   3413                      break;
   3414                   case 2:
   3415                      goto bad;
   3416                   default:
   3417                      goto bad;
   3418                }
   3419                opc1 = (opc >> 2) & 3;
   3420                opc2 = opc & 3;
   3421                insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
   3422                                regM, regD, X1011,
   3423                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   3424                *p++ = insn;
   3425                goto done;
   3426             case ARMneon_GETELEMS:
   3427                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   3428                                 dregNo(i->ARMin.NUnaryS.src->reg);
   3429                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   3430                M = regM >> 4;
   3431                D = regD >> 4;
   3432                regM &= 0xF;
   3433                regD &= 0xF;
   3434                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3435                   goto bad;
   3436                switch (size) {
   3437                   case 0:
   3438                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   3439                         regM++;
   3440                         i->ARMin.NUnaryS.src->index -= 8;
   3441                      }
   3442                      if (i->ARMin.NUnaryS.src->index > 7)
   3443                         goto bad;
   3444                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   3445                      break;
   3446                   case 1:
   3447                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   3448                         regM++;
   3449                         i->ARMin.NUnaryS.src->index -= 4;
   3450                      }
   3451                      if (i->ARMin.NUnaryS.src->index > 3)
   3452                         goto bad;
   3453                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   3454                      break;
   3455                   case 2:
   3456                      if (Q && i->ARMin.NUnaryS.src->index > 1) {
   3457                         regM++;
   3458                         i->ARMin.NUnaryS.src->index -= 2;
   3459                      }
   3460                      if (i->ARMin.NUnaryS.src->index > 1)
   3461                         goto bad;
   3462                      opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
   3463                      break;
   3464                   default:
   3465                      goto bad;
   3466                }
   3467                opc1 = (opc >> 2) & 3;
   3468                opc2 = opc & 3;
   3469                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
   3470                                regM, regD, X1011,
   3471                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   3472                *p++ = insn;
   3473                goto done;
   3474             default:
   3475                goto bad;
   3476          }
   3477       }
   3478       case ARMin_NUnary: {
   3479          UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
   3480          UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
   3481                        ? (qregNo(i->ARMin.NUnary.dst) << 1)
   3482                        : dregNo(i->ARMin.NUnary.dst);
   3483          UInt regM, M;
   3484          UInt D = regD >> 4;
   3485          UInt sz1 = i->ARMin.NUnary.size >> 1;
   3486          UInt sz2 = i->ARMin.NUnary.size & 1;
   3487          UInt sz = i->ARMin.NUnary.size;
   3488          UInt insn;
   3489          UInt F = 0; /* TODO: floating point EQZ ??? */
   3490          if (i->ARMin.NUnary.op != ARMneon_DUP) {
   3491             regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
   3492                      ? (qregNo(i->ARMin.NUnary.src) << 1)
   3493                      : dregNo(i->ARMin.NUnary.src);
   3494             M = regM >> 4;
   3495          } else {
   3496             regM = iregNo(i->ARMin.NUnary.src);
   3497             M = regM >> 4;
   3498          }
   3499          regD &= 0xF;
   3500          regM &= 0xF;
   3501          switch (i->ARMin.NUnary.op) {
   3502             case ARMneon_COPY: /* VMOV reg, reg */
   3503                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
   3504                                BITS4(M,Q,M,1), regM);
   3505                break;
   3506             case ARMneon_COPYN: /* VMOVN regD, regQ */
   3507                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3508                                regD, X0010, BITS4(0,0,M,0), regM);
   3509                break;
   3510             case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
   3511                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3512                                regD, X0010, BITS4(1,0,M,0), regM);
   3513                break;
   3514             case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
   3515                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3516                                regD, X0010, BITS4(0,1,M,0), regM);
   3517                break;
   3518             case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
   3519                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3520                                regD, X0010, BITS4(1,1,M,0), regM);
   3521                break;
   3522             case ARMneon_COPYLS: /* VMOVL regQ, regD */
   3523                if (sz >= 3)
   3524                   goto bad;
   3525                insn = XXXXXXXX(0xF, X0010,
   3526                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   3527                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   3528                                regD, X1010, BITS4(0,0,M,1), regM);
   3529                break;
   3530             case ARMneon_COPYLU: /* VMOVL regQ, regD */
   3531                if (sz >= 3)
   3532                   goto bad;
   3533                insn = XXXXXXXX(0xF, X0011,
   3534                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   3535                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   3536                                regD, X1010, BITS4(0,0,M,1), regM);
   3537                break;
   3538             case ARMneon_NOT: /* VMVN reg, reg*/
   3539                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   3540                                BITS4(1,Q,M,0), regM);
   3541                break;
   3542             case ARMneon_EQZ:
   3543                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   3544                                regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
   3545                break;
   3546             case ARMneon_CNT:
   3547                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   3548                                BITS4(0,Q,M,0), regM);
   3549                break;
   3550             case ARMneon_CLZ:
   3551                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3552                                regD, X0100, BITS4(1,Q,M,0), regM);
   3553                break;
   3554             case ARMneon_CLS:
   3555                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3556                                regD, X0100, BITS4(0,Q,M,0), regM);
   3557                break;
   3558             case ARMneon_ABS:
   3559                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   3560                                regD, X0011, BITS4(0,Q,M,0), regM);
   3561                break;
   3562             case ARMneon_DUP:
   3563                sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
   3564                sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
   3565                vassert(sz1 + sz2 < 2);
   3566                insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
   3567                                X1011, BITS4(D,0,sz2,1), X0000);
   3568                break;
   3569             case ARMneon_REV16:
   3570                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3571                                regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
   3572                break;
   3573             case ARMneon_REV32:
   3574                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3575                                regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
   3576                break;
   3577             case ARMneon_REV64:
   3578                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3579                                regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
   3580                break;
   3581             case ARMneon_PADDLU:
   3582                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3583                                regD, X0010, BITS4(1,Q,M,0), regM);
   3584                break;
   3585             case ARMneon_PADDLS:
   3586                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3587                                regD, X0010, BITS4(0,Q,M,0), regM);
   3588                break;
   3589             case ARMneon_VQSHLNUU:
   3590                insn = XXXXXXXX(0xF, X0011,
   3591                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3592                                sz & 0xf, regD, X0111,
   3593                                BITS4(sz >> 6,Q,M,1), regM);
   3594                break;
   3595             case ARMneon_VQSHLNSS:
   3596                insn = XXXXXXXX(0xF, X0010,
   3597                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3598                                sz & 0xf, regD, X0111,
   3599                                BITS4(sz >> 6,Q,M,1), regM);
   3600                break;
   3601             case ARMneon_VQSHLNUS:
   3602                insn = XXXXXXXX(0xF, X0011,
   3603                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3604                                sz & 0xf, regD, X0110,
   3605                                BITS4(sz >> 6,Q,M,1), regM);
   3606                break;
   3607             case ARMneon_VCVTFtoS:
   3608                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   3609                                BITS4(0,Q,M,0), regM);
   3610                break;
   3611             case ARMneon_VCVTFtoU:
   3612                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   3613                                BITS4(1,Q,M,0), regM);
   3614                break;
   3615             case ARMneon_VCVTStoF:
   3616                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   3617                                BITS4(0,Q,M,0), regM);
   3618                break;
   3619             case ARMneon_VCVTUtoF:
   3620                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   3621                                BITS4(1,Q,M,0), regM);
   3622                break;
   3623             case ARMneon_VCVTFtoFixedU:
   3624                sz1 = (sz >> 5) & 1;
   3625                sz2 = (sz >> 4) & 1;
   3626                sz &= 0xf;
   3627                insn = XXXXXXXX(0xF, X0011,
   3628                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   3629                                BITS4(0,Q,M,1), regM);
   3630                break;
   3631             case ARMneon_VCVTFtoFixedS:
   3632                sz1 = (sz >> 5) & 1;
   3633                sz2 = (sz >> 4) & 1;
   3634                sz &= 0xf;
   3635                insn = XXXXXXXX(0xF, X0010,
   3636                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   3637                                BITS4(0,Q,M,1), regM);
   3638                break;
   3639             case ARMneon_VCVTFixedUtoF:
   3640                sz1 = (sz >> 5) & 1;
   3641                sz2 = (sz >> 4) & 1;
   3642                sz &= 0xf;
   3643                insn = XXXXXXXX(0xF, X0011,
   3644                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   3645                                BITS4(0,Q,M,1), regM);
   3646                break;
   3647             case ARMneon_VCVTFixedStoF:
   3648                sz1 = (sz >> 5) & 1;
   3649                sz2 = (sz >> 4) & 1;
   3650                sz &= 0xf;
   3651                insn = XXXXXXXX(0xF, X0010,
   3652                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   3653                                BITS4(0,Q,M,1), regM);
   3654                break;
   3655             case ARMneon_VCVTF32toF16:
   3656                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
   3657                                BITS4(0,0,M,0), regM);
   3658                break;
   3659             case ARMneon_VCVTF16toF32:
   3660                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
   3661                                BITS4(0,0,M,0), regM);
   3662                break;
   3663             case ARMneon_VRECIP:
   3664                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   3665                                BITS4(0,Q,M,0), regM);
   3666                break;
   3667             case ARMneon_VRECIPF:
   3668                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   3669                                BITS4(0,Q,M,0), regM);
   3670                break;
   3671             case ARMneon_VABSFP:
   3672                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   3673                                BITS4(0,Q,M,0), regM);
   3674                break;
   3675             case ARMneon_VRSQRTEFP:
   3676                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   3677                                BITS4(1,Q,M,0), regM);
   3678                break;
   3679             case ARMneon_VRSQRTE:
   3680                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   3681                                BITS4(1,Q,M,0), regM);
   3682                break;
   3683             case ARMneon_VNEGF:
   3684                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   3685                                BITS4(1,Q,M,0), regM);
   3686                break;
   3687 
   3688             default:
   3689                goto bad;
   3690          }
   3691          *p++ = insn;
   3692          goto done;
   3693       }
   3694       case ARMin_NDual: {
   3695          UInt Q = i->ARMin.NDual.Q ? 1 : 0;
   3696          UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
   3697                        ? (qregNo(i->ARMin.NDual.arg1) << 1)
   3698                        : dregNo(i->ARMin.NDual.arg1);
   3699          UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
   3700                        ? (qregNo(i->ARMin.NDual.arg2) << 1)
   3701                        : dregNo(i->ARMin.NDual.arg2);
   3702          UInt D = regD >> 4;
   3703          UInt M = regM >> 4;
   3704          UInt sz1 = i->ARMin.NDual.size >> 1;
   3705          UInt sz2 = i->ARMin.NDual.size & 1;
   3706          UInt insn;
   3707          regD &= 0xF;
   3708          regM &= 0xF;
   3709          switch (i->ARMin.NDual.op) {
   3710             case ARMneon_TRN: /* VTRN reg, reg */
   3711                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3712                                regD, X0000, BITS4(1,Q,M,0), regM);
   3713                break;
   3714             case ARMneon_ZIP: /* VZIP reg, reg */
   3715                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3716                                regD, X0001, BITS4(1,Q,M,0), regM);
   3717                break;
   3718             case ARMneon_UZP: /* VUZP reg, reg */
   3719                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3720                                regD, X0001, BITS4(0,Q,M,0), regM);
   3721                break;
   3722             default:
   3723                goto bad;
   3724          }
   3725          *p++ = insn;
   3726          goto done;
   3727       }
   3728       case ARMin_NBinary: {
   3729          UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
   3730          UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
   3731                        ? (qregNo(i->ARMin.NBinary.dst) << 1)
   3732                        : dregNo(i->ARMin.NBinary.dst);
   3733          UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
   3734                        ? (qregNo(i->ARMin.NBinary.argL) << 1)
   3735                        : dregNo(i->ARMin.NBinary.argL);
   3736          UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
   3737                        ? (qregNo(i->ARMin.NBinary.argR) << 1)
   3738                        : dregNo(i->ARMin.NBinary.argR);
   3739          UInt sz1 = i->ARMin.NBinary.size >> 1;
   3740          UInt sz2 = i->ARMin.NBinary.size & 1;
   3741          UInt D = regD >> 4;
   3742          UInt N = regN >> 4;
   3743          UInt M = regM >> 4;
   3744          UInt insn;
   3745          regD &= 0xF;
   3746          regM &= 0xF;
   3747          regN &= 0xF;
   3748          switch (i->ARMin.NBinary.op) {
   3749             case ARMneon_VAND: /* VAND reg, reg, reg */
   3750                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
   3751                                BITS4(N,Q,M,1), regM);
   3752                break;
   3753             case ARMneon_VORR: /* VORR reg, reg, reg*/
   3754                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
   3755                                BITS4(N,Q,M,1), regM);
   3756                break;
   3757             case ARMneon_VXOR: /* VEOR reg, reg, reg */
   3758                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
   3759                                BITS4(N,Q,M,1), regM);
   3760                break;
   3761             case ARMneon_VADD: /* VADD reg, reg, reg */
   3762                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3763                                X1000, BITS4(N,Q,M,0), regM);
   3764                break;
   3765             case ARMneon_VSUB: /* VSUB reg, reg, reg */
   3766                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3767                                X1000, BITS4(N,Q,M,0), regM);
   3768                break;
   3769             case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
   3770                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3771                                X0110, BITS4(N,Q,M,1), regM);
   3772                break;
   3773             case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
   3774                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3775                                X0110, BITS4(N,Q,M,1), regM);
   3776                break;
   3777             case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
   3778                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3779                                X0110, BITS4(N,Q,M,0), regM);
   3780                break;
   3781             case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
   3782                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3783                                X0110, BITS4(N,Q,M,0), regM);
   3784                break;
   3785             case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
   3786                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3787                                X0001, BITS4(N,Q,M,0), regM);
   3788                break;
   3789             case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
   3790                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3791                                X0001, BITS4(N,Q,M,0), regM);
   3792                break;
   3793             case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
   3794                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3795                                X0000, BITS4(N,Q,M,1), regM);
   3796                break;
   3797             case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
   3798                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3799                                X0000, BITS4(N,Q,M,1), regM);
   3800                break;
   3801             case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
   3802                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3803                                X0010, BITS4(N,Q,M,1), regM);
   3804                break;
   3805             case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
   3806                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3807                                X0010, BITS4(N,Q,M,1), regM);
   3808                break;
   3809             case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
   3810                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3811                                X0011, BITS4(N,Q,M,0), regM);
   3812                break;
   3813             case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
   3814                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3815                                X0011, BITS4(N,Q,M,0), regM);
   3816                break;
   3817             case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
   3818                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3819                                X0011, BITS4(N,Q,M,1), regM);
   3820                break;
   3821             case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
   3822                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3823                                X0011, BITS4(N,Q,M,1), regM);
   3824                break;
   3825             case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
   3826                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3827                                X1000, BITS4(N,Q,M,1), regM);
   3828                break;
   3829             case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
   3830                if (i->ARMin.NBinary.size >= 16)
   3831                   goto bad;
   3832                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
   3833                                i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
   3834                                regM);
   3835                break;
   3836             case ARMneon_VMUL:
   3837                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3838                                X1001, BITS4(N,Q,M,1), regM);
   3839                break;
   3840             case ARMneon_VMULLU:
   3841                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
   3842                                X1100, BITS4(N,0,M,0), regM);
   3843                break;
   3844             case ARMneon_VMULLS:
   3845                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   3846                                X1100, BITS4(N,0,M,0), regM);
   3847                break;
   3848             case ARMneon_VMULP:
   3849                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3850                                X1001, BITS4(N,Q,M,1), regM);
   3851                break;
   3852             case ARMneon_VMULFP:
   3853                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   3854                                X1101, BITS4(N,Q,M,1), regM);
   3855                break;
   3856             case ARMneon_VMULLP:
   3857                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   3858                                X1110, BITS4(N,0,M,0), regM);
   3859                break;
   3860             case ARMneon_VQDMULH:
   3861                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3862                                X1011, BITS4(N,Q,M,0), regM);
   3863                break;
   3864             case ARMneon_VQRDMULH:
   3865                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3866                                X1011, BITS4(N,Q,M,0), regM);
   3867                break;
   3868             case ARMneon_VQDMULL:
   3869                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   3870                                X1101, BITS4(N,0,M,0), regM);
   3871                break;
   3872             case ARMneon_VTBL:
   3873                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
   3874                                X1000, BITS4(N,0,M,0), regM);
   3875                break;
   3876             case ARMneon_VPADD:
   3877                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3878                                X1011, BITS4(N,Q,M,1), regM);
   3879                break;
   3880             case ARMneon_VPADDFP:
   3881                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   3882                                X1101, BITS4(N,Q,M,0), regM);
   3883                break;
   3884             case ARMneon_VPMINU:
   3885                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3886                                X1010, BITS4(N,Q,M,1), regM);
   3887                break;
   3888             case ARMneon_VPMINS:
   3889                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3890                                X1010, BITS4(N,Q,M,1), regM);
   3891                break;
   3892             case ARMneon_VPMAXU:
   3893                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3894                                X1010, BITS4(N,Q,M,0), regM);
   3895                break;
   3896             case ARMneon_VPMAXS:
   3897                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3898                                X1010, BITS4(N,Q,M,0), regM);
   3899                break;
   3900             case ARMneon_VADDFP: /* VADD reg, reg, reg */
   3901                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   3902                                X1101, BITS4(N,Q,M,0), regM);
   3903                break;
   3904             case ARMneon_VSUBFP: /* VADD reg, reg, reg */
   3905                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   3906                                X1101, BITS4(N,Q,M,0), regM);
   3907                break;
   3908             case ARMneon_VABDFP: /* VABD reg, reg, reg */
   3909                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   3910                                X1101, BITS4(N,Q,M,0), regM);
   3911                break;
   3912             case ARMneon_VMINF:
   3913                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   3914                                X1111, BITS4(N,Q,M,0), regM);
   3915                break;
   3916             case ARMneon_VMAXF:
   3917                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   3918                                X1111, BITS4(N,Q,M,0), regM);
   3919                break;
   3920             case ARMneon_VPMINF:
   3921                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   3922                                X1111, BITS4(N,Q,M,0), regM);
   3923                break;
   3924             case ARMneon_VPMAXF:
   3925                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   3926                                X1111, BITS4(N,Q,M,0), regM);
   3927                break;
   3928             case ARMneon_VRECPS:
   3929                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
   3930                                BITS4(N,Q,M,1), regM);
   3931                break;
   3932             case ARMneon_VCGTF:
   3933                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
   3934                                BITS4(N,Q,M,0), regM);
   3935                break;
   3936             case ARMneon_VCGEF:
   3937                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
   3938                                BITS4(N,Q,M,0), regM);
   3939                break;
   3940             case ARMneon_VCEQF:
   3941                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
   3942                                BITS4(N,Q,M,0), regM);
   3943                break;
   3944             case ARMneon_VRSQRTS:
   3945                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
   3946                                BITS4(N,Q,M,1), regM);
   3947                break;
   3948             default:
   3949                goto bad;
   3950          }
   3951          *p++ = insn;
   3952          goto done;
   3953       }
   3954       case ARMin_NShift: {
   3955          UInt Q = i->ARMin.NShift.Q ? 1 : 0;
   3956          UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
   3957                        ? (qregNo(i->ARMin.NShift.dst) << 1)
   3958                        : dregNo(i->ARMin.NShift.dst);
   3959          UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
   3960                        ? (qregNo(i->ARMin.NShift.argL) << 1)
   3961                        : dregNo(i->ARMin.NShift.argL);
   3962          UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
   3963                        ? (qregNo(i->ARMin.NShift.argR) << 1)
   3964                        : dregNo(i->ARMin.NShift.argR);
   3965          UInt sz1 = i->ARMin.NShift.size >> 1;
   3966          UInt sz2 = i->ARMin.NShift.size & 1;
   3967          UInt D = regD >> 4;
   3968          UInt N = regN >> 4;
   3969          UInt M = regM >> 4;
   3970          UInt insn;
   3971          regD &= 0xF;
   3972          regM &= 0xF;
   3973          regN &= 0xF;
   3974          switch (i->ARMin.NShift.op) {
   3975             case ARMneon_VSHL:
   3976                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3977                                X0100, BITS4(N,Q,M,0), regM);
   3978                break;
   3979             case ARMneon_VSAL:
   3980                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3981                                X0100, BITS4(N,Q,M,0), regM);
   3982                break;
   3983             case ARMneon_VQSHL:
   3984                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   3985                                X0100, BITS4(N,Q,M,1), regM);
   3986                break;
   3987             case ARMneon_VQSAL:
   3988                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   3989                                X0100, BITS4(N,Q,M,1), regM);
   3990                break;
   3991             default:
   3992                goto bad;
   3993          }
   3994          *p++ = insn;
   3995          goto done;
   3996       }
   3997       case ARMin_NeonImm: {
   3998          UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
   3999          UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
   4000                           dregNo(i->ARMin.NeonImm.dst);
   4001          UInt D = regD >> 4;
   4002          UInt imm = i->ARMin.NeonImm.imm->imm8;
   4003          UInt tp = i->ARMin.NeonImm.imm->type;
   4004          UInt j = imm >> 7;
   4005          UInt imm3 = (imm >> 4) & 0x7;
   4006          UInt imm4 = imm & 0xF;
   4007          UInt cmode, op;
   4008          UInt insn;
   4009          regD &= 0xF;
   4010          if (tp == 9)
   4011             op = 1;
   4012          else
   4013             op = 0;
   4014          switch (tp) {
   4015             case 0:
   4016             case 1:
   4017             case 2:
   4018             case 3:
   4019             case 4:
   4020             case 5:
   4021                cmode = tp << 1;
   4022                break;
   4023             case 9:
   4024             case 6:
   4025                cmode = 14;
   4026                break;
   4027             case 7:
   4028                cmode = 12;
   4029                break;
   4030             case 8:
   4031                cmode = 13;
   4032                break;
   4033             case 10:
   4034                cmode = 15;
   4035                break;
   4036             default:
   4037                vpanic("ARMin_NeonImm");
   4038 
   4039          }
   4040          insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
   4041                          cmode, BITS4(0,Q,op,1), imm4);
   4042          *p++ = insn;
   4043          goto done;
   4044       }
   4045       case ARMin_NCMovQ: {
   4046          UInt cc = (UInt)i->ARMin.NCMovQ.cond;
   4047          UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
   4048          UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
   4049          UInt vM = qM & 0xF;
   4050          UInt vD = qD & 0xF;
   4051          UInt M  = (qM >> 4) & 1;
   4052          UInt D  = (qD >> 4) & 1;
   4053          vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
   4054          /* b!cc here+8: !cc A00 0000 */
   4055          UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
   4056          *p++ = insn;
   4057          /* vmov qD, qM */
   4058          insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
   4059                          vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
   4060          *p++ = insn;
   4061          goto done;
   4062       }
   4063       case ARMin_Add32: {
   4064          UInt regD = iregNo(i->ARMin.Add32.rD);
   4065          UInt regN = iregNo(i->ARMin.Add32.rN);
   4066          UInt imm32 = i->ARMin.Add32.imm32;
   4067          vassert(regD != regN);
   4068          /* MOV regD, imm32 */
   4069          p = imm32_to_iregNo((UInt *)p, regD, imm32);
   4070          /* ADD regD, regN, regD */
   4071          UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
   4072          *p++ = insn;
   4073          goto done;
   4074       }
   4075       /* ... */
   4076       default:
   4077          goto bad;
   4078     }
   4079 
   4080   bad:
   4081    ppARMInstr(i);
   4082    vpanic("emit_ARMInstr");
   4083    /*NOTREACHED*/
   4084 
   4085   done:
   4086    vassert(((UChar*)p) - &buf[0] <= 32);
   4087    return ((UChar*)p) - &buf[0];
   4088 }
   4089 
   4090 #undef BITS4
   4091 #undef X0000
   4092 #undef X0001
   4093 #undef X0010
   4094 #undef X0011
   4095 #undef X0100
   4096 #undef X0101
   4097 #undef X0110
   4098 #undef X0111
   4099 #undef X1000
   4100 #undef X1001
   4101 #undef X1010
   4102 #undef X1011
   4103 #undef X1100
   4104 #undef X1101
   4105 #undef X1110
   4106 #undef X1111
   4107 #undef XXXXX___
   4108 #undef XXXXXX__
   4109 #undef XXX___XX
   4110 #undef XXXXX__X
   4111 #undef XXXXXXXX
   4112 
   4113 /*---------------------------------------------------------------*/
   4114 /*--- end                                     host_arm_defs.c ---*/
   4115 /*---------------------------------------------------------------*/
   4116