Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_arm_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2012 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2012 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_arm_defs.h"
     43 
     44 UInt arm_hwcaps = 0;
     45 
     46 
     47 /* --------- Registers. --------- */
     48 
     49 /* The usual HReg abstraction.
     50    There are 16 general purpose regs.
     51 */
     52 
     53 void ppHRegARM ( HReg reg )  {
     54    Int r;
     55    /* Be generic for all virtual regs. */
     56    if (hregIsVirtual(reg)) {
     57       ppHReg(reg);
     58       return;
     59    }
     60    /* But specific for real regs. */
     61    switch (hregClass(reg)) {
     62       case HRcInt32:
     63          r = hregNumber(reg);
     64          vassert(r >= 0 && r < 16);
     65          vex_printf("r%d", r);
     66          return;
     67       case HRcFlt64:
     68          r = hregNumber(reg);
     69          vassert(r >= 0 && r < 32);
     70          vex_printf("d%d", r);
     71          return;
     72       case HRcFlt32:
     73          r = hregNumber(reg);
     74          vassert(r >= 0 && r < 32);
     75          vex_printf("s%d", r);
     76          return;
     77       case HRcVec128:
     78          r = hregNumber(reg);
     79          vassert(r >= 0 && r < 16);
     80          vex_printf("q%d", r);
     81          return;
     82       default:
     83          vpanic("ppHRegARM");
     84    }
     85 }
     86 
     87 HReg hregARM_R0  ( void ) { return mkHReg(0,  HRcInt32, False); }
     88 HReg hregARM_R1  ( void ) { return mkHReg(1,  HRcInt32, False); }
     89 HReg hregARM_R2  ( void ) { return mkHReg(2,  HRcInt32, False); }
     90 HReg hregARM_R3  ( void ) { return mkHReg(3,  HRcInt32, False); }
     91 HReg hregARM_R4  ( void ) { return mkHReg(4,  HRcInt32, False); }
     92 HReg hregARM_R5  ( void ) { return mkHReg(5,  HRcInt32, False); }
     93 HReg hregARM_R6  ( void ) { return mkHReg(6,  HRcInt32, False); }
     94 HReg hregARM_R7  ( void ) { return mkHReg(7,  HRcInt32, False); }
     95 HReg hregARM_R8  ( void ) { return mkHReg(8,  HRcInt32, False); }
     96 HReg hregARM_R9  ( void ) { return mkHReg(9,  HRcInt32, False); }
     97 HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
     98 HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
     99 HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
    100 HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
    101 HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
    102 HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
    103 HReg hregARM_D8  ( void ) { return mkHReg(8,  HRcFlt64, False); }
    104 HReg hregARM_D9  ( void ) { return mkHReg(9,  HRcFlt64, False); }
    105 HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
    106 HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
    107 HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
    108 HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
    109 HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
    110 HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
    111 HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
    112 HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
    113 HReg hregARM_Q8  ( void ) { return mkHReg(8,  HRcVec128, False); }
    114 HReg hregARM_Q9  ( void ) { return mkHReg(9,  HRcVec128, False); }
    115 HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
    116 HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
    117 HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
    118 HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
    119 HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
    120 HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
    121 
    122 void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
    123 {
    124    Int i = 0;
    125    *nregs = 26;
    126    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    127    // callee saves ones are listed first, since we prefer them
    128    // if they're available
    129    (*arr)[i++] = hregARM_R4();
    130    (*arr)[i++] = hregARM_R5();
    131    (*arr)[i++] = hregARM_R6();
    132    (*arr)[i++] = hregARM_R7();
    133    (*arr)[i++] = hregARM_R10();
    134    (*arr)[i++] = hregARM_R11();
    135    // otherwise we'll have to slum it out with caller-saves ones
    136    (*arr)[i++] = hregARM_R0();
    137    (*arr)[i++] = hregARM_R1();
    138    (*arr)[i++] = hregARM_R2();
    139    (*arr)[i++] = hregARM_R3();
    140    (*arr)[i++] = hregARM_R9();
    141    // FP hreegisters.  Note: these are all callee-save.  Yay!
    142    // Hence we don't need to mention them as trashed in
    143    // getHRegUsage for ARMInstr_Call.
    144    (*arr)[i++] = hregARM_D8();
    145    (*arr)[i++] = hregARM_D9();
    146    (*arr)[i++] = hregARM_D10();
    147    (*arr)[i++] = hregARM_D11();
    148    (*arr)[i++] = hregARM_D12();
    149    (*arr)[i++] = hregARM_S26();
    150    (*arr)[i++] = hregARM_S27();
    151    (*arr)[i++] = hregARM_S28();
    152    (*arr)[i++] = hregARM_S29();
    153    (*arr)[i++] = hregARM_S30();
    154 
    155    (*arr)[i++] = hregARM_Q8();
    156    (*arr)[i++] = hregARM_Q9();
    157    (*arr)[i++] = hregARM_Q10();
    158    (*arr)[i++] = hregARM_Q11();
    159    (*arr)[i++] = hregARM_Q12();
    160 
    161    //(*arr)[i++] = hregARM_Q13();
    162    //(*arr)[i++] = hregARM_Q14();
    163    //(*arr)[i++] = hregARM_Q15();
    164 
    165    // unavail: r8 as GSP
    166    // r12 is used as a spill/reload temporary
    167    // r13 as SP
    168    // r14 as LR
    169    // r15 as PC
    170    //
    171    // All in all, we have 11 allocatable integer registers:
    172    // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
    173    // and r12 dedicated as a spill temporary.
    174    // 13 14 and 15 are not under the allocator's control.
    175    //
    176    // Hence for the allocatable registers we have:
    177    //
    178    // callee-saved: 4 5 6 7 (8) 9 10 11
    179    // caller-saved: 0 1 2 3
    180    // Note 9 is ambiguous: the base EABI does not give an e/r-saved
    181    // designation for it, but the Linux instantiation of the ABI
    182    // specifies it as callee-saved.
    183    //
    184    // If the set of available registers changes or if the e/r status
    185    // changes, be sure to re-check/sync the definition of
    186    // getHRegUsage for ARMInstr_Call too.
    187    vassert(i == *nregs);
    188 }
    189 
    190 
    191 
    192 /* --------- Condition codes, ARM encoding. --------- */
    193 
    194 HChar* showARMCondCode ( ARMCondCode cond ) {
    195    switch (cond) {
    196        case ARMcc_EQ:  return "eq";
    197        case ARMcc_NE:  return "ne";
    198        case ARMcc_HS:  return "hs";
    199        case ARMcc_LO:  return "lo";
    200        case ARMcc_MI:  return "mi";
    201        case ARMcc_PL:  return "pl";
    202        case ARMcc_VS:  return "vs";
    203        case ARMcc_VC:  return "vc";
    204        case ARMcc_HI:  return "hi";
    205        case ARMcc_LS:  return "ls";
    206        case ARMcc_GE:  return "ge";
    207        case ARMcc_LT:  return "lt";
    208        case ARMcc_GT:  return "gt";
    209        case ARMcc_LE:  return "le";
    210        case ARMcc_AL:  return "al"; // default
    211        case ARMcc_NV:  return "nv";
    212        default: vpanic("showARMCondCode");
    213    }
    214 }
    215 
    216 
    217 /* --------- Mem AModes: Addressing Mode 1 --------- */
    218 
    219 ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
    220    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
    221    am->tag              = ARMam1_RI;
    222    am->ARMam1.RI.reg    = reg;
    223    am->ARMam1.RI.simm13 = simm13;
    224    vassert(-4095 <= simm13 && simm13 <= 4095);
    225    return am;
    226 }
    227 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
    228    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
    229    am->tag              = ARMam1_RRS;
    230    am->ARMam1.RRS.base  = base;
    231    am->ARMam1.RRS.index = index;
    232    am->ARMam1.RRS.shift = shift;
    233    vassert(0 <= shift && shift <= 3);
    234    return am;
    235 }
    236 
    237 void ppARMAMode1 ( ARMAMode1* am ) {
    238    switch (am->tag) {
    239       case ARMam1_RI:
    240          vex_printf("%d(", am->ARMam1.RI.simm13);
    241          ppHRegARM(am->ARMam1.RI.reg);
    242          vex_printf(")");
    243          break;
    244       case ARMam1_RRS:
    245          vex_printf("(");
    246          ppHRegARM(am->ARMam1.RRS.base);
    247          vex_printf(",");
    248          ppHRegARM(am->ARMam1.RRS.index);
    249          vex_printf(",%u)", am->ARMam1.RRS.shift);
    250          break;
    251       default:
    252          vassert(0);
    253    }
    254 }
    255 
    256 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
    257    switch (am->tag) {
    258       case ARMam1_RI:
    259          addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
    260          return;
    261       case ARMam1_RRS:
    262          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
    263          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
    264          //   return;
    265       default:
    266          vpanic("addRegUsage_ARMAmode1");
    267    }
    268 }
    269 
    270 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
    271    switch (am->tag) {
    272       case ARMam1_RI:
    273          am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
    274          return;
    275       case ARMam1_RRS:
    276          //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
    277          //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
    278          //return;
    279       default:
    280          vpanic("mapRegs_ARMAmode1");
    281    }
    282 }
    283 
    284 
    285 /* --------- Mem AModes: Addressing Mode 2 --------- */
    286 
    287 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
    288    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    289    am->tag             = ARMam2_RI;
    290    am->ARMam2.RI.reg   = reg;
    291    am->ARMam2.RI.simm9 = simm9;
    292    vassert(-255 <= simm9 && simm9 <= 255);
    293    return am;
    294 }
    295 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
    296    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    297    am->tag             = ARMam2_RR;
    298    am->ARMam2.RR.base  = base;
    299    am->ARMam2.RR.index = index;
    300    return am;
    301 }
    302 
    303 void ppARMAMode2 ( ARMAMode2* am ) {
    304    switch (am->tag) {
    305       case ARMam2_RI:
    306          vex_printf("%d(", am->ARMam2.RI.simm9);
    307          ppHRegARM(am->ARMam2.RI.reg);
    308          vex_printf(")");
    309          break;
    310       case ARMam2_RR:
    311          vex_printf("(");
    312          ppHRegARM(am->ARMam2.RR.base);
    313          vex_printf(",");
    314          ppHRegARM(am->ARMam2.RR.index);
    315          vex_printf(")");
    316          break;
    317       default:
    318          vassert(0);
    319    }
    320 }
    321 
    322 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
    323    switch (am->tag) {
    324       case ARMam2_RI:
    325          addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
    326          return;
    327       case ARMam2_RR:
    328          //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
    329          //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
    330          //   return;
    331       default:
    332          vpanic("addRegUsage_ARMAmode2");
    333    }
    334 }
    335 
    336 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
    337    switch (am->tag) {
    338       case ARMam2_RI:
    339          am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
    340          return;
    341       case ARMam2_RR:
    342          //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
    343          //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
    344          //return;
    345       default:
    346          vpanic("mapRegs_ARMAmode2");
    347    }
    348 }
    349 
    350 
    351 /* --------- Mem AModes: Addressing Mode VFP --------- */
    352 
    353 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
    354    ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
    355    vassert(simm11 >= -1020 && simm11 <= 1020);
    356    vassert(0 == (simm11 & 3));
    357    am->reg    = reg;
    358    am->simm11 = simm11;
    359    return am;
    360 }
    361 
    362 void ppARMAModeV ( ARMAModeV* am ) {
    363    vex_printf("%d(", am->simm11);
    364    ppHRegARM(am->reg);
    365    vex_printf(")");
    366 }
    367 
    368 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
    369    addHRegUse(u, HRmRead, am->reg);
    370 }
    371 
    372 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
    373    am->reg = lookupHRegRemap(m, am->reg);
    374 }
    375 
    376 
    377 /* --------- Mem AModes: Addressing Mode Neon ------- */
    378 
    379 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
    380    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    381    am->tag = ARMamN_RR;
    382    am->ARMamN.RR.rN = rN;
    383    am->ARMamN.RR.rM = rM;
    384    return am;
    385 }
    386 
    387 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
    388    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    389    am->tag = ARMamN_R;
    390    am->ARMamN.R.rN = rN;
    391    return am;
    392 }
    393 
    394 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
    395    if (am->tag == ARMamN_R) {
    396       addHRegUse(u, HRmRead, am->ARMamN.R.rN);
    397    } else {
    398       addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
    399       addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
    400    }
    401 }
    402 
    403 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
    404    if (am->tag == ARMamN_R) {
    405       am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
    406    } else {
    407       am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
    408       am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
    409    }
    410 }
    411 
    412 void ppARMAModeN ( ARMAModeN* am ) {
    413    vex_printf("[");
    414    if (am->tag == ARMamN_R) {
    415       ppHRegARM(am->ARMamN.R.rN);
    416    } else {
    417       ppHRegARM(am->ARMamN.RR.rN);
    418    }
    419    vex_printf("]");
    420    if (am->tag == ARMamN_RR) {
    421       vex_printf(", ");
    422       ppHRegARM(am->ARMamN.RR.rM);
    423    }
    424 }
    425 
    426 
    427 /* --------- Reg or imm-8x4 operands --------- */
    428 
    429 static UInt ROR32 ( UInt x, UInt sh ) {
    430    vassert(sh >= 0 && sh < 32);
    431    if (sh == 0)
    432       return x;
    433    else
    434       return (x << (32-sh)) | (x >> sh);
    435 }
    436 
    437 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
    438    ARMRI84* ri84          = LibVEX_Alloc(sizeof(ARMRI84));
    439    ri84->tag              = ARMri84_I84;
    440    ri84->ARMri84.I84.imm8 = imm8;
    441    ri84->ARMri84.I84.imm4 = imm4;
    442    vassert(imm8 >= 0 && imm8 <= 255);
    443    vassert(imm4 >= 0 && imm4 <= 15);
    444    return ri84;
    445 }
    446 ARMRI84* ARMRI84_R ( HReg reg ) {
    447    ARMRI84* ri84       = LibVEX_Alloc(sizeof(ARMRI84));
    448    ri84->tag           = ARMri84_R;
    449    ri84->ARMri84.R.reg = reg;
    450    return ri84;
    451 }
    452 
    453 void ppARMRI84 ( ARMRI84* ri84 ) {
    454    switch (ri84->tag) {
    455       case ARMri84_I84:
    456          vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
    457                                   2 * ri84->ARMri84.I84.imm4));
    458          break;
    459       case ARMri84_R:
    460          ppHRegARM(ri84->ARMri84.R.reg);
    461          break;
    462       default:
    463          vassert(0);
    464    }
    465 }
    466 
    467 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
    468    switch (ri84->tag) {
    469       case ARMri84_I84:
    470          return;
    471       case ARMri84_R:
    472          addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
    473          return;
    474       default:
    475          vpanic("addRegUsage_ARMRI84");
    476    }
    477 }
    478 
    479 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
    480    switch (ri84->tag) {
    481       case ARMri84_I84:
    482          return;
    483       case ARMri84_R:
    484          ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
    485          return;
    486       default:
    487          vpanic("mapRegs_ARMRI84");
    488    }
    489 }
    490 
    491 
    492 /* --------- Reg or imm5 operands --------- */
    493 
    494 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
    495    ARMRI5* ri5         = LibVEX_Alloc(sizeof(ARMRI5));
    496    ri5->tag            = ARMri5_I5;
    497    ri5->ARMri5.I5.imm5 = imm5;
    498    vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
    499    return ri5;
    500 }
    501 ARMRI5* ARMRI5_R ( HReg reg ) {
    502    ARMRI5* ri5       = LibVEX_Alloc(sizeof(ARMRI5));
    503    ri5->tag          = ARMri5_R;
    504    ri5->ARMri5.R.reg = reg;
    505    return ri5;
    506 }
    507 
    508 void ppARMRI5 ( ARMRI5* ri5 ) {
    509    switch (ri5->tag) {
    510       case ARMri5_I5:
    511          vex_printf("%u", ri5->ARMri5.I5.imm5);
    512          break;
    513       case ARMri5_R:
    514          ppHRegARM(ri5->ARMri5.R.reg);
    515          break;
    516       default:
    517          vassert(0);
    518    }
    519 }
    520 
    521 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
    522    switch (ri5->tag) {
    523       case ARMri5_I5:
    524          return;
    525       case ARMri5_R:
    526          addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
    527          return;
    528       default:
    529          vpanic("addRegUsage_ARMRI5");
    530    }
    531 }
    532 
    533 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
    534    switch (ri5->tag) {
    535       case ARMri5_I5:
    536          return;
    537       case ARMri5_R:
    538          ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
    539          return;
    540       default:
    541          vpanic("mapRegs_ARMRI5");
    542    }
    543 }
    544 
    545 /* -------- Neon Immediate operatnd --------- */
    546 
    547 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
    548    ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
    549    i->type = type;
    550    i->imm8 = imm8;
    551    return i;
    552 }
    553 
    554 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
    555    int i, j;
    556    ULong y, x = imm->imm8;
    557    switch (imm->type) {
    558       case 3:
    559          x = x << 8;
    560       case 2:
    561          x = x << 8;
    562       case 1:
    563          x = x << 8;
    564       case 0:
    565          return (x << 32) | x;
    566       case 5:
    567       case 6:
    568          if (imm->type == 5)
    569             x = x << 8;
    570          else
    571             x = (x << 8) | x;
    572       case 4:
    573          x = (x << 16) | x;
    574          return (x << 32) | x;
    575       case 8:
    576          x = (x << 8) | 0xFF;
    577       case 7:
    578          x = (x << 8) | 0xFF;
    579          return (x << 32) | x;
    580       case 9:
    581          x = 0;
    582          for (i = 7; i >= 0; i--) {
    583             y = ((ULong)imm->imm8 >> i) & 1;
    584             for (j = 0; j < 8; j++) {
    585                x = (x << 1) | y;
    586             }
    587          }
    588          return x;
    589       case 10:
    590          x |= (x & 0x80) << 5;
    591          x |= (~x & 0x40) << 5;
    592          x &= 0x187F; /* 0001 1000 0111 1111 */
    593          x |= (x & 0x40) << 4;
    594          x |= (x & 0x40) << 3;
    595          x |= (x & 0x40) << 2;
    596          x |= (x & 0x40) << 1;
    597          x = x << 19;
    598          x = (x << 32) | x;
    599          return x;
    600       default:
    601          vpanic("ARMNImm_to_Imm64");
    602    }
    603 }
    604 
    605 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
    606    ARMNImm tmp;
    607    if ((x & 0xFFFFFFFF) == (x >> 32)) {
    608       if ((x & 0xFFFFFF00) == 0)
    609          return ARMNImm_TI(0, x & 0xFF);
    610       if ((x & 0xFFFF00FF) == 0)
    611          return ARMNImm_TI(1, (x >> 8) & 0xFF);
    612       if ((x & 0xFF00FFFF) == 0)
    613          return ARMNImm_TI(2, (x >> 16) & 0xFF);
    614       if ((x & 0x00FFFFFF) == 0)
    615          return ARMNImm_TI(3, (x >> 24) & 0xFF);
    616       if ((x & 0xFFFF00FF) == 0xFF)
    617          return ARMNImm_TI(7, (x >> 8) & 0xFF);
    618       if ((x & 0xFF00FFFF) == 0xFFFF)
    619          return ARMNImm_TI(8, (x >> 16) & 0xFF);
    620       if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
    621          if ((x & 0xFF00) == 0)
    622             return ARMNImm_TI(4, x & 0xFF);
    623          if ((x & 0x00FF) == 0)
    624             return ARMNImm_TI(5, (x >> 8) & 0xFF);
    625          if ((x & 0xFF) == ((x >> 8) & 0xFF))
    626             return ARMNImm_TI(6, x & 0xFF);
    627       }
    628       if ((x & 0x7FFFF) == 0) {
    629          tmp.type = 10;
    630          tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
    631          if (ARMNImm_to_Imm64(&tmp) == x)
    632             return ARMNImm_TI(tmp.type, tmp.imm8);
    633       }
    634    } else {
    635       /* This can only be type 9. */
    636       tmp.imm8 = (((x >> 56) & 1) << 7)
    637                | (((x >> 48) & 1) << 6)
    638                | (((x >> 40) & 1) << 5)
    639                | (((x >> 32) & 1) << 4)
    640                | (((x >> 24) & 1) << 3)
    641                | (((x >> 16) & 1) << 2)
    642                | (((x >>  8) & 1) << 1)
    643                | (((x >>  0) & 1) << 0);
    644       tmp.type = 9;
    645       if (ARMNImm_to_Imm64 (&tmp) == x)
    646          return ARMNImm_TI(tmp.type, tmp.imm8);
    647    }
    648    return NULL;
    649 }
    650 
    651 void ppARMNImm (ARMNImm* i) {
    652    ULong x = ARMNImm_to_Imm64(i);
    653    vex_printf("0x%llX%llX", x, x);
    654 }
    655 
    656 /* -- Register or scalar operand --- */
    657 
    658 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
    659 {
    660    ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
    661    p->tag = tag;
    662    p->reg = reg;
    663    p->index = index;
    664    return p;
    665 }
    666 
    667 void ppARMNRS(ARMNRS *p)
    668 {
    669    ppHRegARM(p->reg);
    670    if (p->tag == ARMNRS_Scalar) {
    671       vex_printf("[%d]", p->index);
    672    }
    673 }
    674 
    675 /* --------- Instructions. --------- */
    676 
    677 HChar* showARMAluOp ( ARMAluOp op ) {
    678    switch (op) {
    679       case ARMalu_ADD:  return "add";
    680       case ARMalu_ADDS: return "adds";
    681       case ARMalu_ADC:  return "adc";
    682       case ARMalu_SUB:  return "sub";
    683       case ARMalu_SUBS: return "subs";
    684       case ARMalu_SBC:  return "sbc";
    685       case ARMalu_AND:  return "and";
    686       case ARMalu_BIC:  return "bic";
    687       case ARMalu_OR:   return "orr";
    688       case ARMalu_XOR:  return "xor";
    689       default: vpanic("showARMAluOp");
    690    }
    691 }
    692 
    693 HChar* showARMShiftOp ( ARMShiftOp op ) {
    694    switch (op) {
    695       case ARMsh_SHL: return "shl";
    696       case ARMsh_SHR: return "shr";
    697       case ARMsh_SAR: return "sar";
    698       default: vpanic("showARMShiftOp");
    699    }
    700 }
    701 
    702 HChar* showARMUnaryOp ( ARMUnaryOp op ) {
    703    switch (op) {
    704       case ARMun_NEG: return "neg";
    705       case ARMun_NOT: return "not";
    706       case ARMun_CLZ: return "clz";
    707       default: vpanic("showARMUnaryOp");
    708    }
    709 }
    710 
    711 HChar* showARMMulOp ( ARMMulDivOp op ) {
    712    switch (op) {
    713       case ARMmul_PLAIN: return "mul";
    714       case ARMmul_ZX:    return "umull";
    715       case ARMmul_SX:    return "smull";
    716       default: vpanic("showARMMulOp");
    717    }
    718 }
    719 
    720 HChar* showARMDivOp ( ARMMulDivOp op ) {
    721    switch (op) {
    722       case ARMdiv_S:     return "sdiv";
    723       case ARMdiv_U:     return "udiv";
    724       default: vpanic("showARMDivOp");
    725    }
    726 }
    727 
    728 HChar* showARMVfpOp ( ARMVfpOp op ) {
    729    switch (op) {
    730       case ARMvfp_ADD: return "add";
    731       case ARMvfp_SUB: return "sub";
    732       case ARMvfp_MUL: return "mul";
    733       case ARMvfp_DIV: return "div";
    734       default: vpanic("showARMVfpOp");
    735    }
    736 }
    737 
    738 HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
    739    switch (op) {
    740       case ARMvfpu_COPY: return "cpy";
    741       case ARMvfpu_NEG:  return "neg";
    742       case ARMvfpu_ABS:  return "abs";
    743       case ARMvfpu_SQRT: return "sqrt";
    744       default: vpanic("showARMVfpUnaryOp");
    745    }
    746 }
    747 
    748 HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
    749    switch (op) {
    750       case ARMneon_VAND: return "vand";
    751       case ARMneon_VORR: return "vorr";
    752       case ARMneon_VXOR: return "veor";
    753       case ARMneon_VADD: return "vadd";
    754       case ARMneon_VRHADDS: return "vrhadd";
    755       case ARMneon_VRHADDU: return "vrhadd";
    756       case ARMneon_VADDFP: return "vadd";
    757       case ARMneon_VPADDFP: return "vpadd";
    758       case ARMneon_VABDFP: return "vabd";
    759       case ARMneon_VSUB: return "vsub";
    760       case ARMneon_VSUBFP: return "vsub";
    761       case ARMneon_VMINU: return "vmin";
    762       case ARMneon_VMINS: return "vmin";
    763       case ARMneon_VMINF: return "vmin";
    764       case ARMneon_VMAXU: return "vmax";
    765       case ARMneon_VMAXS: return "vmax";
    766       case ARMneon_VMAXF: return "vmax";
    767       case ARMneon_VQADDU: return "vqadd";
    768       case ARMneon_VQADDS: return "vqadd";
    769       case ARMneon_VQSUBU: return "vqsub";
    770       case ARMneon_VQSUBS: return "vqsub";
    771       case ARMneon_VCGTU:  return "vcgt";
    772       case ARMneon_VCGTS:  return "vcgt";
    773       case ARMneon_VCGTF:  return "vcgt";
    774       case ARMneon_VCGEF:  return "vcgt";
    775       case ARMneon_VCGEU:  return "vcge";
    776       case ARMneon_VCGES:  return "vcge";
    777       case ARMneon_VCEQ:  return "vceq";
    778       case ARMneon_VCEQF:  return "vceq";
    779       case ARMneon_VPADD:   return "vpadd";
    780       case ARMneon_VPMINU:   return "vpmin";
    781       case ARMneon_VPMINS:   return "vpmin";
    782       case ARMneon_VPMINF:   return "vpmin";
    783       case ARMneon_VPMAXU:   return "vpmax";
    784       case ARMneon_VPMAXS:   return "vpmax";
    785       case ARMneon_VPMAXF:   return "vpmax";
    786       case ARMneon_VEXT:   return "vext";
    787       case ARMneon_VMUL:   return "vmuli";
    788       case ARMneon_VMULLU:   return "vmull";
    789       case ARMneon_VMULLS:   return "vmull";
    790       case ARMneon_VMULP:  return "vmul";
    791       case ARMneon_VMULFP:  return "vmul";
    792       case ARMneon_VMULLP:  return "vmul";
    793       case ARMneon_VQDMULH: return "vqdmulh";
    794       case ARMneon_VQRDMULH: return "vqrdmulh";
    795       case ARMneon_VQDMULL: return "vqdmull";
    796       case ARMneon_VTBL: return "vtbl";
    797       case ARMneon_VRECPS: return "vrecps";
    798       case ARMneon_VRSQRTS: return "vrecps";
    799       /* ... */
    800       default: vpanic("showARMNeonBinOp");
    801    }
    802 }
    803 
    804 HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
    805    switch (op) {
    806       case ARMneon_VAND:
    807       case ARMneon_VORR:
    808       case ARMneon_VXOR:
    809          return "";
    810       case ARMneon_VADD:
    811       case ARMneon_VSUB:
    812       case ARMneon_VEXT:
    813       case ARMneon_VMUL:
    814       case ARMneon_VPADD:
    815       case ARMneon_VTBL:
    816       case ARMneon_VCEQ:
    817          return ".i";
    818       case ARMneon_VRHADDU:
    819       case ARMneon_VMINU:
    820       case ARMneon_VMAXU:
    821       case ARMneon_VQADDU:
    822       case ARMneon_VQSUBU:
    823       case ARMneon_VCGTU:
    824       case ARMneon_VCGEU:
    825       case ARMneon_VMULLU:
    826       case ARMneon_VPMINU:
    827       case ARMneon_VPMAXU:
    828          return ".u";
    829       case ARMneon_VRHADDS:
    830       case ARMneon_VMINS:
    831       case ARMneon_VMAXS:
    832       case ARMneon_VQADDS:
    833       case ARMneon_VQSUBS:
    834       case ARMneon_VCGTS:
    835       case ARMneon_VCGES:
    836       case ARMneon_VQDMULL:
    837       case ARMneon_VMULLS:
    838       case ARMneon_VPMINS:
    839       case ARMneon_VPMAXS:
    840       case ARMneon_VQDMULH:
    841       case ARMneon_VQRDMULH:
    842          return ".s";
    843       case ARMneon_VMULP:
    844       case ARMneon_VMULLP:
    845          return ".p";
    846       case ARMneon_VADDFP:
    847       case ARMneon_VABDFP:
    848       case ARMneon_VPADDFP:
    849       case ARMneon_VSUBFP:
    850       case ARMneon_VMULFP:
    851       case ARMneon_VMINF:
    852       case ARMneon_VMAXF:
    853       case ARMneon_VPMINF:
    854       case ARMneon_VPMAXF:
    855       case ARMneon_VCGTF:
    856       case ARMneon_VCGEF:
    857       case ARMneon_VCEQF:
    858       case ARMneon_VRECPS:
    859       case ARMneon_VRSQRTS:
    860          return ".f";
    861       /* ... */
    862       default: vpanic("showARMNeonBinOpDataType");
    863    }
    864 }
    865 
    866 HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
    867    switch (op) {
    868       case ARMneon_COPY: return "vmov";
    869       case ARMneon_COPYLS: return "vmov";
    870       case ARMneon_COPYLU: return "vmov";
    871       case ARMneon_COPYN: return "vmov";
    872       case ARMneon_COPYQNSS: return "vqmovn";
    873       case ARMneon_COPYQNUS: return "vqmovun";
    874       case ARMneon_COPYQNUU: return "vqmovn";
    875       case ARMneon_NOT: return "vmvn";
    876       case ARMneon_EQZ: return "vceq";
    877       case ARMneon_CNT: return "vcnt";
    878       case ARMneon_CLS: return "vcls";
    879       case ARMneon_CLZ: return "vclz";
    880       case ARMneon_DUP: return "vdup";
    881       case ARMneon_PADDLS: return "vpaddl";
    882       case ARMneon_PADDLU: return "vpaddl";
    883       case ARMneon_VQSHLNSS: return "vqshl";
    884       case ARMneon_VQSHLNUU: return "vqshl";
    885       case ARMneon_VQSHLNUS: return "vqshlu";
    886       case ARMneon_REV16: return "vrev16";
    887       case ARMneon_REV32: return "vrev32";
    888       case ARMneon_REV64: return "vrev64";
    889       case ARMneon_VCVTFtoU: return "vcvt";
    890       case ARMneon_VCVTFtoS: return "vcvt";
    891       case ARMneon_VCVTUtoF: return "vcvt";
    892       case ARMneon_VCVTStoF: return "vcvt";
    893       case ARMneon_VCVTFtoFixedU: return "vcvt";
    894       case ARMneon_VCVTFtoFixedS: return "vcvt";
    895       case ARMneon_VCVTFixedUtoF: return "vcvt";
    896       case ARMneon_VCVTFixedStoF: return "vcvt";
    897       case ARMneon_VCVTF32toF16: return "vcvt";
    898       case ARMneon_VCVTF16toF32: return "vcvt";
    899       case ARMneon_VRECIP: return "vrecip";
    900       case ARMneon_VRECIPF: return "vrecipf";
    901       case ARMneon_VNEGF: return "vneg";
    902       case ARMneon_ABS: return "vabs";
    903       case ARMneon_VABSFP: return "vabsfp";
    904       case ARMneon_VRSQRTEFP: return "vrsqrtefp";
    905       case ARMneon_VRSQRTE: return "vrsqrte";
    906       /* ... */
    907       default: vpanic("showARMNeonUnOp");
    908    }
    909 }
    910 
    911 HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
    912    switch (op) {
    913       case ARMneon_COPY:
    914       case ARMneon_NOT:
    915          return "";
    916       case ARMneon_COPYN:
    917       case ARMneon_EQZ:
    918       case ARMneon_CNT:
    919       case ARMneon_DUP:
    920       case ARMneon_REV16:
    921       case ARMneon_REV32:
    922       case ARMneon_REV64:
    923          return ".i";
    924       case ARMneon_COPYLU:
    925       case ARMneon_PADDLU:
    926       case ARMneon_COPYQNUU:
    927       case ARMneon_VQSHLNUU:
    928       case ARMneon_VRECIP:
    929       case ARMneon_VRSQRTE:
    930          return ".u";
    931       case ARMneon_CLS:
    932       case ARMneon_CLZ:
    933       case ARMneon_COPYLS:
    934       case ARMneon_PADDLS:
    935       case ARMneon_COPYQNSS:
    936       case ARMneon_COPYQNUS:
    937       case ARMneon_VQSHLNSS:
    938       case ARMneon_VQSHLNUS:
    939       case ARMneon_ABS:
    940          return ".s";
    941       case ARMneon_VRECIPF:
    942       case ARMneon_VNEGF:
    943       case ARMneon_VABSFP:
    944       case ARMneon_VRSQRTEFP:
    945          return ".f";
    946       case ARMneon_VCVTFtoU: return ".u32.f32";
    947       case ARMneon_VCVTFtoS: return ".s32.f32";
    948       case ARMneon_VCVTUtoF: return ".f32.u32";
    949       case ARMneon_VCVTStoF: return ".f32.s32";
    950       case ARMneon_VCVTF16toF32: return ".f32.f16";
    951       case ARMneon_VCVTF32toF16: return ".f16.f32";
    952       case ARMneon_VCVTFtoFixedU: return ".u32.f32";
    953       case ARMneon_VCVTFtoFixedS: return ".s32.f32";
    954       case ARMneon_VCVTFixedUtoF: return ".f32.u32";
    955       case ARMneon_VCVTFixedStoF: return ".f32.s32";
    956       /* ... */
    957       default: vpanic("showARMNeonUnOpDataType");
    958    }
    959 }
    960 
    961 HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
    962    switch (op) {
    963       case ARMneon_SETELEM: return "vmov";
    964       case ARMneon_GETELEMU: return "vmov";
    965       case ARMneon_GETELEMS: return "vmov";
    966       case ARMneon_VDUP: return "vdup";
    967       /* ... */
    968       default: vpanic("showARMNeonUnarySOp");
    969    }
    970 }
    971 
    972 HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
    973    switch (op) {
    974       case ARMneon_SETELEM:
    975       case ARMneon_VDUP:
    976          return ".i";
    977       case ARMneon_GETELEMS:
    978          return ".s";
    979       case ARMneon_GETELEMU:
    980          return ".u";
    981       /* ... */
    982       default: vpanic("showARMNeonUnarySOp");
    983    }
    984 }
    985 
    986 HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
    987    switch (op) {
    988       case ARMneon_VSHL: return "vshl";
    989       case ARMneon_VSAL: return "vshl";
    990       case ARMneon_VQSHL: return "vqshl";
    991       case ARMneon_VQSAL: return "vqshl";
    992       /* ... */
    993       default: vpanic("showARMNeonShiftOp");
    994    }
    995 }
    996 
    997 HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
    998    switch (op) {
    999       case ARMneon_VSHL:
   1000       case ARMneon_VQSHL:
   1001          return ".u";
   1002       case ARMneon_VSAL:
   1003       case ARMneon_VQSAL:
   1004          return ".s";
   1005       /* ... */
   1006       default: vpanic("showARMNeonShiftOpDataType");
   1007    }
   1008 }
   1009 
   1010 HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
   1011    switch (op) {
   1012       case ARMneon_TRN: return "vtrn";
   1013       case ARMneon_ZIP: return "vzip";
   1014       case ARMneon_UZP: return "vuzp";
   1015       /* ... */
   1016       default: vpanic("showARMNeonDualOp");
   1017    }
   1018 }
   1019 
   1020 HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
   1021    switch (op) {
   1022       case ARMneon_TRN:
   1023       case ARMneon_ZIP:
   1024       case ARMneon_UZP:
   1025          return "i";
   1026       /* ... */
   1027       default: vpanic("showARMNeonDualOp");
   1028    }
   1029 }
   1030 
   1031 static HChar* showARMNeonDataSize_wrk ( UInt size )
   1032 {
   1033    switch (size) {
   1034       case 0: return "8";
   1035       case 1: return "16";
   1036       case 2: return "32";
   1037       case 3: return "64";
   1038       default: vpanic("showARMNeonDataSize");
   1039    }
   1040 }
   1041 
   1042 static HChar* showARMNeonDataSize ( ARMInstr* i )
   1043 {
   1044    switch (i->tag) {
   1045       case ARMin_NBinary:
   1046          if (i->ARMin.NBinary.op == ARMneon_VEXT)
   1047             return "8";
   1048          if (i->ARMin.NBinary.op == ARMneon_VAND ||
   1049              i->ARMin.NBinary.op == ARMneon_VORR ||
   1050              i->ARMin.NBinary.op == ARMneon_VXOR)
   1051             return "";
   1052          return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
   1053       case ARMin_NUnary:
   1054          if (i->ARMin.NUnary.op == ARMneon_COPY ||
   1055              i->ARMin.NUnary.op == ARMneon_NOT ||
   1056              i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
   1057              i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
   1058              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1059              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1060              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1061              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
   1062              i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
   1063              i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
   1064              i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
   1065              i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
   1066             return "";
   1067          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1068              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1069              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1070             UInt size;
   1071             size = i->ARMin.NUnary.size;
   1072             if (size & 0x40)
   1073                return "64";
   1074             if (size & 0x20)
   1075                return "32";
   1076             if (size & 0x10)
   1077                return "16";
   1078             if (size & 0x08)
   1079                return "8";
   1080             vpanic("showARMNeonDataSize");
   1081          }
   1082          return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
   1083       case ARMin_NUnaryS:
   1084          if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
   1085             int size;
   1086             size = i->ARMin.NUnaryS.size;
   1087             if ((size & 1) == 1)
   1088                return "8";
   1089             if ((size & 3) == 2)
   1090                return "16";
   1091             if ((size & 7) == 4)
   1092                return "32";
   1093             vpanic("showARMNeonDataSize");
   1094          }
   1095          return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
   1096       case ARMin_NShift:
   1097          return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
   1098       case ARMin_NDual:
   1099          return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
   1100       default:
   1101          vpanic("showARMNeonDataSize");
   1102    }
   1103 }
   1104 
   1105 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
   1106                          HReg dst, HReg argL, ARMRI84* argR ) {
   1107    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1108    i->tag            = ARMin_Alu;
   1109    i->ARMin.Alu.op   = op;
   1110    i->ARMin.Alu.dst  = dst;
   1111    i->ARMin.Alu.argL = argL;
   1112    i->ARMin.Alu.argR = argR;
   1113    return i;
   1114 }
   1115 ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
   1116                             HReg dst, HReg argL, ARMRI5* argR ) {
   1117    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1118    i->tag              = ARMin_Shift;
   1119    i->ARMin.Shift.op   = op;
   1120    i->ARMin.Shift.dst  = dst;
   1121    i->ARMin.Shift.argL = argL;
   1122    i->ARMin.Shift.argR = argR;
   1123    return i;
   1124 }
   1125 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
   1126    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1127    i->tag             = ARMin_Unary;
   1128    i->ARMin.Unary.op  = op;
   1129    i->ARMin.Unary.dst = dst;
   1130    i->ARMin.Unary.src = src;
   1131    return i;
   1132 }
   1133 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
   1134    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1135    i->tag                  = ARMin_CmpOrTst;
   1136    i->ARMin.CmpOrTst.isCmp = isCmp;
   1137    i->ARMin.CmpOrTst.argL  = argL;
   1138    i->ARMin.CmpOrTst.argR  = argR;
   1139    return i;
   1140 }
   1141 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
   1142    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1143    i->tag           = ARMin_Mov;
   1144    i->ARMin.Mov.dst = dst;
   1145    i->ARMin.Mov.src = src;
   1146    return i;
   1147 }
   1148 ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
   1149    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1150    i->tag               = ARMin_Imm32;
   1151    i->ARMin.Imm32.dst   = dst;
   1152    i->ARMin.Imm32.imm32 = imm32;
   1153    return i;
   1154 }
   1155 ARMInstr* ARMInstr_LdSt32 ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1156    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1157    i->tag                 = ARMin_LdSt32;
   1158    i->ARMin.LdSt32.isLoad = isLoad;
   1159    i->ARMin.LdSt32.rD     = rD;
   1160    i->ARMin.LdSt32.amode  = amode;
   1161    return i;
   1162 }
   1163 ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad,
   1164                             HReg rD, ARMAMode2* amode ) {
   1165    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1166    i->tag                     = ARMin_LdSt16;
   1167    i->ARMin.LdSt16.isLoad     = isLoad;
   1168    i->ARMin.LdSt16.signedLoad = signedLoad;
   1169    i->ARMin.LdSt16.rD         = rD;
   1170    i->ARMin.LdSt16.amode      = amode;
   1171    return i;
   1172 }
   1173 ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1174    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1175    i->tag                 = ARMin_LdSt8U;
   1176    i->ARMin.LdSt8U.isLoad = isLoad;
   1177    i->ARMin.LdSt8U.rD     = rD;
   1178    i->ARMin.LdSt8U.amode  = amode;
   1179    return i;
   1180 }
   1181 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
   1182                              ARMCondCode cond, Bool toFastEP ) {
   1183    ARMInstr* i               = LibVEX_Alloc(sizeof(ARMInstr));
   1184    i->tag                    = ARMin_XDirect;
   1185    i->ARMin.XDirect.dstGA    = dstGA;
   1186    i->ARMin.XDirect.amR15T   = amR15T;
   1187    i->ARMin.XDirect.cond     = cond;
   1188    i->ARMin.XDirect.toFastEP = toFastEP;
   1189    return i;
   1190 }
   1191 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
   1192                             ARMCondCode cond ) {
   1193    ARMInstr* i            = LibVEX_Alloc(sizeof(ARMInstr));
   1194    i->tag                 = ARMin_XIndir;
   1195    i->ARMin.XIndir.dstGA  = dstGA;
   1196    i->ARMin.XIndir.amR15T = amR15T;
   1197    i->ARMin.XIndir.cond   = cond;
   1198    return i;
   1199 }
   1200 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
   1201                                ARMCondCode cond, IRJumpKind jk ) {
   1202    ARMInstr* i               = LibVEX_Alloc(sizeof(ARMInstr));
   1203    i->tag                    = ARMin_XAssisted;
   1204    i->ARMin.XAssisted.dstGA  = dstGA;
   1205    i->ARMin.XAssisted.amR15T = amR15T;
   1206    i->ARMin.XAssisted.cond   = cond;
   1207    i->ARMin.XAssisted.jk     = jk;
   1208    return i;
   1209 }
   1210 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
   1211    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1212    i->tag             = ARMin_CMov;
   1213    i->ARMin.CMov.cond = cond;
   1214    i->ARMin.CMov.dst  = dst;
   1215    i->ARMin.CMov.src  = src;
   1216    vassert(cond != ARMcc_AL);
   1217    return i;
   1218 }
   1219 ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs ) {
   1220    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1221    i->tag                 = ARMin_Call;
   1222    i->ARMin.Call.cond     = cond;
   1223    i->ARMin.Call.target   = target;
   1224    i->ARMin.Call.nArgRegs = nArgRegs;
   1225    return i;
   1226 }
   1227 ARMInstr* ARMInstr_Mul ( ARMMulDivOp op ) {
   1228    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1229    i->tag          = ARMin_Mul;
   1230    i->ARMin.Mul.op = op;
   1231    return i;
   1232 }
   1233 ARMInstr* ARMInstr_Div ( ARMMulDivOp op, HReg dst, HReg argL, HReg argR ) {
   1234    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1235    i->tag          = ARMin_Div;
   1236    i->ARMin.Div.op = op;
   1237    i->ARMin.Div.dst  = dst;
   1238    i->ARMin.Div.argL = argL;
   1239    i->ARMin.Div.argR = argR;
   1240    return i;
   1241 }
   1242 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
   1243    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1244    i->tag             = ARMin_LdrEX;
   1245    i->ARMin.LdrEX.szB = szB;
   1246    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1247    return i;
   1248 }
   1249 ARMInstr* ARMInstr_StrEX ( Int szB ) {
   1250    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1251    i->tag             = ARMin_StrEX;
   1252    i->ARMin.StrEX.szB = szB;
   1253    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1254    return i;
   1255 }
   1256 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
   1257    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1258    i->tag                 = ARMin_VLdStD;
   1259    i->ARMin.VLdStD.isLoad = isLoad;
   1260    i->ARMin.VLdStD.dD     = dD;
   1261    i->ARMin.VLdStD.amode  = am;
   1262    return i;
   1263 }
   1264 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
   1265    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1266    i->tag                 = ARMin_VLdStS;
   1267    i->ARMin.VLdStS.isLoad = isLoad;
   1268    i->ARMin.VLdStS.fD     = fD;
   1269    i->ARMin.VLdStS.amode  = am;
   1270    return i;
   1271 }
   1272 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1273    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1274    i->tag              = ARMin_VAluD;
   1275    i->ARMin.VAluD.op   = op;
   1276    i->ARMin.VAluD.dst  = dst;
   1277    i->ARMin.VAluD.argL = argL;
   1278    i->ARMin.VAluD.argR = argR;
   1279    return i;
   1280 }
   1281 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1282    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1283    i->tag              = ARMin_VAluS;
   1284    i->ARMin.VAluS.op   = op;
   1285    i->ARMin.VAluS.dst  = dst;
   1286    i->ARMin.VAluS.argL = argL;
   1287    i->ARMin.VAluS.argR = argR;
   1288    return i;
   1289 }
   1290 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1291    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1292    i->tag               = ARMin_VUnaryD;
   1293    i->ARMin.VUnaryD.op  = op;
   1294    i->ARMin.VUnaryD.dst = dst;
   1295    i->ARMin.VUnaryD.src = src;
   1296    return i;
   1297 }
   1298 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1299    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1300    i->tag               = ARMin_VUnaryS;
   1301    i->ARMin.VUnaryS.op  = op;
   1302    i->ARMin.VUnaryS.dst = dst;
   1303    i->ARMin.VUnaryS.src = src;
   1304    return i;
   1305 }
   1306 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
   1307    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1308    i->tag              = ARMin_VCmpD;
   1309    i->ARMin.VCmpD.argL = argL;
   1310    i->ARMin.VCmpD.argR = argR;
   1311    return i;
   1312 }
   1313 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
   1314    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1315    i->tag               = ARMin_VCMovD;
   1316    i->ARMin.VCMovD.cond = cond;
   1317    i->ARMin.VCMovD.dst  = dst;
   1318    i->ARMin.VCMovD.src  = src;
   1319    vassert(cond != ARMcc_AL);
   1320    return i;
   1321 }
   1322 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
   1323    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1324    i->tag               = ARMin_VCMovS;
   1325    i->ARMin.VCMovS.cond = cond;
   1326    i->ARMin.VCMovS.dst  = dst;
   1327    i->ARMin.VCMovS.src  = src;
   1328    vassert(cond != ARMcc_AL);
   1329    return i;
   1330 }
   1331 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
   1332    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1333    i->tag               = ARMin_VCvtSD;
   1334    i->ARMin.VCvtSD.sToD = sToD;
   1335    i->ARMin.VCvtSD.dst  = dst;
   1336    i->ARMin.VCvtSD.src  = src;
   1337    return i;
   1338 }
   1339 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
   1340    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1341    i->tag              = ARMin_VXferD;
   1342    i->ARMin.VXferD.toD = toD;
   1343    i->ARMin.VXferD.dD  = dD;
   1344    i->ARMin.VXferD.rHi = rHi;
   1345    i->ARMin.VXferD.rLo = rLo;
   1346    return i;
   1347 }
   1348 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
   1349    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1350    i->tag              = ARMin_VXferS;
   1351    i->ARMin.VXferS.toS = toS;
   1352    i->ARMin.VXferS.fD  = fD;
   1353    i->ARMin.VXferS.rLo = rLo;
   1354    return i;
   1355 }
   1356 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
   1357                             HReg dst, HReg src ) {
   1358    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1359    i->tag                = ARMin_VCvtID;
   1360    i->ARMin.VCvtID.iToD  = iToD;
   1361    i->ARMin.VCvtID.syned = syned;
   1362    i->ARMin.VCvtID.dst   = dst;
   1363    i->ARMin.VCvtID.src   = src;
   1364    return i;
   1365 }
   1366 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
   1367    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1368    i->tag                 = ARMin_FPSCR;
   1369    i->ARMin.FPSCR.toFPSCR = toFPSCR;
   1370    i->ARMin.FPSCR.iReg    = iReg;
   1371    return i;
   1372 }
   1373 ARMInstr* ARMInstr_MFence ( void ) {
   1374    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1375    i->tag      = ARMin_MFence;
   1376    return i;
   1377 }
   1378 ARMInstr* ARMInstr_CLREX( void ) {
   1379    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1380    i->tag      = ARMin_CLREX;
   1381    return i;
   1382 }
   1383 
   1384 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
   1385    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1386    i->tag                  = ARMin_NLdStQ;
   1387    i->ARMin.NLdStQ.isLoad  = isLoad;
   1388    i->ARMin.NLdStQ.dQ      = dQ;
   1389    i->ARMin.NLdStQ.amode   = amode;
   1390    return i;
   1391 }
   1392 
   1393 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
   1394    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1395    i->tag                  = ARMin_NLdStD;
   1396    i->ARMin.NLdStD.isLoad  = isLoad;
   1397    i->ARMin.NLdStD.dD      = dD;
   1398    i->ARMin.NLdStD.amode   = amode;
   1399    return i;
   1400 }
   1401 
   1402 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
   1403                             UInt size, Bool Q ) {
   1404    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1405    i->tag                = ARMin_NUnary;
   1406    i->ARMin.NUnary.op   = op;
   1407    i->ARMin.NUnary.src  = nQ;
   1408    i->ARMin.NUnary.dst  = dQ;
   1409    i->ARMin.NUnary.size = size;
   1410    i->ARMin.NUnary.Q    = Q;
   1411    return i;
   1412 }
   1413 
   1414 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
   1415                              UInt size, Bool Q ) {
   1416    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1417    i->tag                = ARMin_NUnaryS;
   1418    i->ARMin.NUnaryS.op   = op;
   1419    i->ARMin.NUnaryS.src  = src;
   1420    i->ARMin.NUnaryS.dst  = dst;
   1421    i->ARMin.NUnaryS.size = size;
   1422    i->ARMin.NUnaryS.Q    = Q;
   1423    return i;
   1424 }
   1425 
   1426 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
   1427                            UInt size, Bool Q ) {
   1428    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1429    i->tag                = ARMin_NDual;
   1430    i->ARMin.NDual.op   = op;
   1431    i->ARMin.NDual.arg1 = nQ;
   1432    i->ARMin.NDual.arg2 = mQ;
   1433    i->ARMin.NDual.size = size;
   1434    i->ARMin.NDual.Q    = Q;
   1435    return i;
   1436 }
   1437 
   1438 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
   1439                              HReg dst, HReg argL, HReg argR,
   1440                              UInt size, Bool Q ) {
   1441    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1442    i->tag                = ARMin_NBinary;
   1443    i->ARMin.NBinary.op   = op;
   1444    i->ARMin.NBinary.argL = argL;
   1445    i->ARMin.NBinary.argR = argR;
   1446    i->ARMin.NBinary.dst  = dst;
   1447    i->ARMin.NBinary.size = size;
   1448    i->ARMin.NBinary.Q    = Q;
   1449    return i;
   1450 }
   1451 
   1452 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
   1453    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1454    i->tag         = ARMin_NeonImm;
   1455    i->ARMin.NeonImm.dst = dst;
   1456    i->ARMin.NeonImm.imm = imm;
   1457    return i;
   1458 }
   1459 
   1460 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
   1461    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1462    i->tag               = ARMin_NCMovQ;
   1463    i->ARMin.NCMovQ.cond = cond;
   1464    i->ARMin.NCMovQ.dst  = dst;
   1465    i->ARMin.NCMovQ.src  = src;
   1466    vassert(cond != ARMcc_AL);
   1467    return i;
   1468 }
   1469 
   1470 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
   1471                             HReg dst, HReg argL, HReg argR,
   1472                             UInt size, Bool Q ) {
   1473    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1474    i->tag                = ARMin_NShift;
   1475    i->ARMin.NShift.op   = op;
   1476    i->ARMin.NShift.argL = argL;
   1477    i->ARMin.NShift.argR = argR;
   1478    i->ARMin.NShift.dst  = dst;
   1479    i->ARMin.NShift.size = size;
   1480    i->ARMin.NShift.Q    = Q;
   1481    return i;
   1482 }
   1483 
   1484 /* Helper copy-pasted from isel.c */
   1485 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
   1486 {
   1487    UInt i;
   1488    for (i = 0; i < 16; i++) {
   1489       if (0 == (u & 0xFFFFFF00)) {
   1490          *u8 = u;
   1491          *u4 = i;
   1492          return True;
   1493       }
   1494       u = ROR32(u, 30);
   1495    }
   1496    vassert(i == 16);
   1497    return False;
   1498 }
   1499 
   1500 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
   1501    UInt u8, u4;
   1502    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1503    /* Try to generate single ADD if possible */
   1504    if (fitsIn8x4(&u8, &u4, imm32)) {
   1505       i->tag            = ARMin_Alu;
   1506       i->ARMin.Alu.op   = ARMalu_ADD;
   1507       i->ARMin.Alu.dst  = rD;
   1508       i->ARMin.Alu.argL = rN;
   1509       i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
   1510    } else {
   1511       i->tag               = ARMin_Add32;
   1512       i->ARMin.Add32.rD    = rD;
   1513       i->ARMin.Add32.rN    = rN;
   1514       i->ARMin.Add32.imm32 = imm32;
   1515    }
   1516    return i;
   1517 }
   1518 
   1519 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
   1520                              ARMAMode1* amFailAddr ) {
   1521    ARMInstr* i                 = LibVEX_Alloc(sizeof(ARMInstr));
   1522    i->tag                      = ARMin_EvCheck;
   1523    i->ARMin.EvCheck.amCounter  = amCounter;
   1524    i->ARMin.EvCheck.amFailAddr = amFailAddr;
   1525    return i;
   1526 }
   1527 
   1528 ARMInstr* ARMInstr_ProfInc ( void ) {
   1529    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1530    i->tag      = ARMin_ProfInc;
   1531    return i;
   1532 }
   1533 
   1534 /* ... */
   1535 
   1536 void ppARMInstr ( ARMInstr* i ) {
   1537    switch (i->tag) {
   1538       case ARMin_Alu:
   1539          vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
   1540          ppHRegARM(i->ARMin.Alu.dst);
   1541          vex_printf(", ");
   1542          ppHRegARM(i->ARMin.Alu.argL);
   1543          vex_printf(", ");
   1544          ppARMRI84(i->ARMin.Alu.argR);
   1545          return;
   1546       case ARMin_Shift:
   1547          vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
   1548          ppHRegARM(i->ARMin.Shift.dst);
   1549          vex_printf(", ");
   1550          ppHRegARM(i->ARMin.Shift.argL);
   1551          vex_printf(", ");
   1552          ppARMRI5(i->ARMin.Shift.argR);
   1553          return;
   1554       case ARMin_Unary:
   1555          vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
   1556          ppHRegARM(i->ARMin.Unary.dst);
   1557          vex_printf(", ");
   1558          ppHRegARM(i->ARMin.Unary.src);
   1559          return;
   1560       case ARMin_CmpOrTst:
   1561          vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
   1562          ppHRegARM(i->ARMin.CmpOrTst.argL);
   1563          vex_printf(", ");
   1564          ppARMRI84(i->ARMin.CmpOrTst.argR);
   1565          return;
   1566       case ARMin_Mov:
   1567          vex_printf("mov   ");
   1568          ppHRegARM(i->ARMin.Mov.dst);
   1569          vex_printf(", ");
   1570          ppARMRI84(i->ARMin.Mov.src);
   1571          return;
   1572       case ARMin_Imm32:
   1573          vex_printf("imm   ");
   1574          ppHRegARM(i->ARMin.Imm32.dst);
   1575          vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
   1576          return;
   1577       case ARMin_LdSt32:
   1578          if (i->ARMin.LdSt32.isLoad) {
   1579             vex_printf("ldr   ");
   1580             ppHRegARM(i->ARMin.LdSt32.rD);
   1581             vex_printf(", ");
   1582             ppARMAMode1(i->ARMin.LdSt32.amode);
   1583          } else {
   1584             vex_printf("str   ");
   1585             ppARMAMode1(i->ARMin.LdSt32.amode);
   1586             vex_printf(", ");
   1587             ppHRegARM(i->ARMin.LdSt32.rD);
   1588          }
   1589          return;
   1590       case ARMin_LdSt16:
   1591          if (i->ARMin.LdSt16.isLoad) {
   1592             vex_printf("%s", i->ARMin.LdSt16.signedLoad
   1593                                 ? "ldrsh " : "ldrh  " );
   1594             ppHRegARM(i->ARMin.LdSt16.rD);
   1595             vex_printf(", ");
   1596             ppARMAMode2(i->ARMin.LdSt16.amode);
   1597          } else {
   1598             vex_printf("strh  ");
   1599             ppARMAMode2(i->ARMin.LdSt16.amode);
   1600             vex_printf(", ");
   1601             ppHRegARM(i->ARMin.LdSt16.rD);
   1602          }
   1603          return;
   1604       case ARMin_LdSt8U:
   1605          if (i->ARMin.LdSt8U.isLoad) {
   1606             vex_printf("ldrb  ");
   1607             ppHRegARM(i->ARMin.LdSt8U.rD);
   1608             vex_printf(", ");
   1609             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1610          } else {
   1611             vex_printf("strb  ");
   1612             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1613             vex_printf(", ");
   1614             ppHRegARM(i->ARMin.LdSt8U.rD);
   1615          }
   1616          return;
   1617       case ARMin_Ld8S:
   1618          goto unhandled;
   1619       case ARMin_XDirect:
   1620          vex_printf("(xDirect) ");
   1621          vex_printf("if (%%cpsr.%s) { ",
   1622                     showARMCondCode(i->ARMin.XDirect.cond));
   1623          vex_printf("movw r12,0x%x; ",
   1624                     (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
   1625          vex_printf("movt r12,0x%x; ",
   1626                     (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
   1627          vex_printf("str r12,");
   1628          ppARMAMode1(i->ARMin.XDirect.amR15T);
   1629          vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
   1630                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
   1631          vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
   1632                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
   1633          vex_printf("blx r12 }");
   1634          return;
   1635       case ARMin_XIndir:
   1636          vex_printf("(xIndir) ");
   1637          vex_printf("if (%%cpsr.%s) { ",
   1638                     showARMCondCode(i->ARMin.XIndir.cond));
   1639          vex_printf("str ");
   1640          ppHRegARM(i->ARMin.XIndir.dstGA);
   1641          vex_printf(",");
   1642          ppARMAMode1(i->ARMin.XIndir.amR15T);
   1643          vex_printf("; movw r12,LO16($disp_cp_xindir); ");
   1644          vex_printf("movt r12,HI16($disp_cp_xindir); ");
   1645          vex_printf("blx r12 }");
   1646          return;
   1647       case ARMin_XAssisted:
   1648          vex_printf("(xAssisted) ");
   1649          vex_printf("if (%%cpsr.%s) { ",
   1650                     showARMCondCode(i->ARMin.XAssisted.cond));
   1651          vex_printf("str ");
   1652          ppHRegARM(i->ARMin.XAssisted.dstGA);
   1653          vex_printf(",");
   1654          ppARMAMode1(i->ARMin.XAssisted.amR15T);
   1655          vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
   1656                     (Int)i->ARMin.XAssisted.jk);
   1657          vex_printf("movw r12,LO16($disp_cp_xassisted); ");
   1658          vex_printf("movt r12,HI16($disp_cp_xassisted); ");
   1659          vex_printf("blx r12 }");
   1660          return;
   1661       case ARMin_CMov:
   1662          vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
   1663          ppHRegARM(i->ARMin.CMov.dst);
   1664          vex_printf(", ");
   1665          ppARMRI84(i->ARMin.CMov.src);
   1666          return;
   1667       case ARMin_Call:
   1668          vex_printf("call%s  ",
   1669                     i->ARMin.Call.cond==ARMcc_AL
   1670                        ? "" : showARMCondCode(i->ARMin.Call.cond));
   1671          vex_printf("0x%lx [nArgRegs=%d]",
   1672                     i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
   1673          return;
   1674       case ARMin_Mul:
   1675          vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
   1676          if (i->ARMin.Mul.op == ARMmul_PLAIN) {
   1677             vex_printf("r0, r2, r3");
   1678          } else {
   1679             vex_printf("r1:r0, r2, r3");
   1680          }
   1681          return;
   1682       case ARMin_Div:
   1683          vex_printf("%-5s ", showARMDivOp(i->ARMin.Div.op));
   1684          ppHRegARM(i->ARMin.Div.dst);
   1685          vex_printf(", ");
   1686          ppHRegARM(i->ARMin.Div.argL);
   1687          vex_printf(", ");
   1688          ppHRegARM(i->ARMin.Div.argR);
   1689          return;
   1690       case ARMin_LdrEX: {
   1691          HChar* sz = "";
   1692          switch (i->ARMin.LdrEX.szB) {
   1693             case 1: sz = "b"; break; case 2: sz = "h"; break;
   1694             case 8: sz = "d"; break; case 4: break;
   1695             default: vassert(0);
   1696          }
   1697          vex_printf("ldrex%s %sr2, [r4]",
   1698                     sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
   1699          return;
   1700       }
   1701       case ARMin_StrEX: {
   1702          HChar* sz = "";
   1703          switch (i->ARMin.StrEX.szB) {
   1704             case 1: sz = "b"; break; case 2: sz = "h"; break;
   1705             case 8: sz = "d"; break; case 4: break;
   1706             default: vassert(0);
   1707          }
   1708          vex_printf("strex%s r0, %sr2, [r4]",
   1709                     sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
   1710          return;
   1711       }
   1712       case ARMin_VLdStD:
   1713          if (i->ARMin.VLdStD.isLoad) {
   1714             vex_printf("fldd  ");
   1715             ppHRegARM(i->ARMin.VLdStD.dD);
   1716             vex_printf(", ");
   1717             ppARMAModeV(i->ARMin.VLdStD.amode);
   1718          } else {
   1719             vex_printf("fstd  ");
   1720             ppARMAModeV(i->ARMin.VLdStD.amode);
   1721             vex_printf(", ");
   1722             ppHRegARM(i->ARMin.VLdStD.dD);
   1723          }
   1724          return;
   1725       case ARMin_VLdStS:
   1726          if (i->ARMin.VLdStS.isLoad) {
   1727             vex_printf("flds  ");
   1728             ppHRegARM(i->ARMin.VLdStS.fD);
   1729             vex_printf(", ");
   1730             ppARMAModeV(i->ARMin.VLdStS.amode);
   1731          } else {
   1732             vex_printf("fsts  ");
   1733             ppARMAModeV(i->ARMin.VLdStS.amode);
   1734             vex_printf(", ");
   1735             ppHRegARM(i->ARMin.VLdStS.fD);
   1736          }
   1737          return;
   1738       case ARMin_VAluD:
   1739          vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
   1740          ppHRegARM(i->ARMin.VAluD.dst);
   1741          vex_printf(", ");
   1742          ppHRegARM(i->ARMin.VAluD.argL);
   1743          vex_printf(", ");
   1744          ppHRegARM(i->ARMin.VAluD.argR);
   1745          return;
   1746       case ARMin_VAluS:
   1747          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
   1748          ppHRegARM(i->ARMin.VAluS.dst);
   1749          vex_printf(", ");
   1750          ppHRegARM(i->ARMin.VAluS.argL);
   1751          vex_printf(", ");
   1752          ppHRegARM(i->ARMin.VAluS.argR);
   1753          return;
   1754       case ARMin_VUnaryD:
   1755          vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
   1756          ppHRegARM(i->ARMin.VUnaryD.dst);
   1757          vex_printf(", ");
   1758          ppHRegARM(i->ARMin.VUnaryD.src);
   1759          return;
   1760       case ARMin_VUnaryS:
   1761          vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
   1762          ppHRegARM(i->ARMin.VUnaryS.dst);
   1763          vex_printf(", ");
   1764          ppHRegARM(i->ARMin.VUnaryS.src);
   1765          return;
   1766       case ARMin_VCmpD:
   1767          vex_printf("fcmpd ");
   1768          ppHRegARM(i->ARMin.VCmpD.argL);
   1769          vex_printf(", ");
   1770          ppHRegARM(i->ARMin.VCmpD.argR);
   1771          vex_printf(" ; fmstat");
   1772          return;
   1773       case ARMin_VCMovD:
   1774          vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
   1775          ppHRegARM(i->ARMin.VCMovD.dst);
   1776          vex_printf(", ");
   1777          ppHRegARM(i->ARMin.VCMovD.src);
   1778          return;
   1779       case ARMin_VCMovS:
   1780          vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
   1781          ppHRegARM(i->ARMin.VCMovS.dst);
   1782          vex_printf(", ");
   1783          ppHRegARM(i->ARMin.VCMovS.src);
   1784          return;
   1785       case ARMin_VCvtSD:
   1786          vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
   1787          ppHRegARM(i->ARMin.VCvtSD.dst);
   1788          vex_printf(", ");
   1789          ppHRegARM(i->ARMin.VCvtSD.src);
   1790          return;
   1791       case ARMin_VXferD:
   1792          vex_printf("vmov  ");
   1793          if (i->ARMin.VXferD.toD) {
   1794             ppHRegARM(i->ARMin.VXferD.dD);
   1795             vex_printf(", ");
   1796             ppHRegARM(i->ARMin.VXferD.rLo);
   1797             vex_printf(", ");
   1798             ppHRegARM(i->ARMin.VXferD.rHi);
   1799          } else {
   1800             ppHRegARM(i->ARMin.VXferD.rLo);
   1801             vex_printf(", ");
   1802             ppHRegARM(i->ARMin.VXferD.rHi);
   1803             vex_printf(", ");
   1804             ppHRegARM(i->ARMin.VXferD.dD);
   1805          }
   1806          return;
   1807       case ARMin_VXferS:
   1808          vex_printf("vmov  ");
   1809          if (i->ARMin.VXferS.toS) {
   1810             ppHRegARM(i->ARMin.VXferS.fD);
   1811             vex_printf(", ");
   1812             ppHRegARM(i->ARMin.VXferS.rLo);
   1813          } else {
   1814             ppHRegARM(i->ARMin.VXferS.rLo);
   1815             vex_printf(", ");
   1816             ppHRegARM(i->ARMin.VXferS.fD);
   1817          }
   1818          return;
   1819       case ARMin_VCvtID: {
   1820          HChar* nm = "?";
   1821          if (i->ARMin.VCvtID.iToD) {
   1822             nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
   1823          } else {
   1824             nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
   1825          }
   1826          vex_printf("%s ", nm);
   1827          ppHRegARM(i->ARMin.VCvtID.dst);
   1828          vex_printf(", ");
   1829          ppHRegARM(i->ARMin.VCvtID.src);
   1830          return;
   1831       }
   1832       case ARMin_FPSCR:
   1833          if (i->ARMin.FPSCR.toFPSCR) {
   1834             vex_printf("fmxr  fpscr, ");
   1835             ppHRegARM(i->ARMin.FPSCR.iReg);
   1836          } else {
   1837             vex_printf("fmrx  ");
   1838             ppHRegARM(i->ARMin.FPSCR.iReg);
   1839             vex_printf(", fpscr");
   1840          }
   1841          return;
   1842       case ARMin_MFence:
   1843          vex_printf("(mfence) dsb sy; dmb sy; isb");
   1844          return;
   1845       case ARMin_CLREX:
   1846          vex_printf("clrex");
   1847          return;
   1848       case ARMin_NLdStQ:
   1849          if (i->ARMin.NLdStQ.isLoad)
   1850             vex_printf("vld1.32 {");
   1851          else
   1852             vex_printf("vst1.32 {");
   1853          ppHRegARM(i->ARMin.NLdStQ.dQ);
   1854          vex_printf("} ");
   1855          ppARMAModeN(i->ARMin.NLdStQ.amode);
   1856          return;
   1857       case ARMin_NLdStD:
   1858          if (i->ARMin.NLdStD.isLoad)
   1859             vex_printf("vld1.32 {");
   1860          else
   1861             vex_printf("vst1.32 {");
   1862          ppHRegARM(i->ARMin.NLdStD.dD);
   1863          vex_printf("} ");
   1864          ppARMAModeN(i->ARMin.NLdStD.amode);
   1865          return;
   1866       case ARMin_NUnary:
   1867          vex_printf("%s%s%s  ",
   1868                     showARMNeonUnOp(i->ARMin.NUnary.op),
   1869                     showARMNeonUnOpDataType(i->ARMin.NUnary.op),
   1870                     showARMNeonDataSize(i));
   1871          ppHRegARM(i->ARMin.NUnary.dst);
   1872          vex_printf(", ");
   1873          ppHRegARM(i->ARMin.NUnary.src);
   1874          if (i->ARMin.NUnary.op == ARMneon_EQZ)
   1875             vex_printf(", #0");
   1876          if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1877              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1878              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1879              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
   1880             vex_printf(", #%d", i->ARMin.NUnary.size);
   1881          }
   1882          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1883              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1884              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1885             UInt size;
   1886             size = i->ARMin.NUnary.size;
   1887             if (size & 0x40) {
   1888                vex_printf(", #%d", size - 64);
   1889             } else if (size & 0x20) {
   1890                vex_printf(", #%d", size - 32);
   1891             } else if (size & 0x10) {
   1892                vex_printf(", #%d", size - 16);
   1893             } else if (size & 0x08) {
   1894                vex_printf(", #%d", size - 8);
   1895             }
   1896          }
   1897          return;
   1898       case ARMin_NUnaryS:
   1899          vex_printf("%s%s%s  ",
   1900                     showARMNeonUnOpS(i->ARMin.NUnaryS.op),
   1901                     showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
   1902                     showARMNeonDataSize(i));
   1903          ppARMNRS(i->ARMin.NUnaryS.dst);
   1904          vex_printf(", ");
   1905          ppARMNRS(i->ARMin.NUnaryS.src);
   1906          return;
   1907       case ARMin_NShift:
   1908          vex_printf("%s%s%s  ",
   1909                     showARMNeonShiftOp(i->ARMin.NShift.op),
   1910                     showARMNeonShiftOpDataType(i->ARMin.NShift.op),
   1911                     showARMNeonDataSize(i));
   1912          ppHRegARM(i->ARMin.NShift.dst);
   1913          vex_printf(", ");
   1914          ppHRegARM(i->ARMin.NShift.argL);
   1915          vex_printf(", ");
   1916          ppHRegARM(i->ARMin.NShift.argR);
   1917          return;
   1918       case ARMin_NDual:
   1919          vex_printf("%s%s%s  ",
   1920                     showARMNeonDualOp(i->ARMin.NDual.op),
   1921                     showARMNeonDualOpDataType(i->ARMin.NDual.op),
   1922                     showARMNeonDataSize(i));
   1923          ppHRegARM(i->ARMin.NDual.arg1);
   1924          vex_printf(", ");
   1925          ppHRegARM(i->ARMin.NDual.arg2);
   1926          return;
   1927       case ARMin_NBinary:
   1928          vex_printf("%s%s%s",
   1929                     showARMNeonBinOp(i->ARMin.NBinary.op),
   1930                     showARMNeonBinOpDataType(i->ARMin.NBinary.op),
   1931                     showARMNeonDataSize(i));
   1932          vex_printf("  ");
   1933          ppHRegARM(i->ARMin.NBinary.dst);
   1934          vex_printf(", ");
   1935          ppHRegARM(i->ARMin.NBinary.argL);
   1936          vex_printf(", ");
   1937          ppHRegARM(i->ARMin.NBinary.argR);
   1938          return;
   1939       case ARMin_NeonImm:
   1940          vex_printf("vmov  ");
   1941          ppHRegARM(i->ARMin.NeonImm.dst);
   1942          vex_printf(", ");
   1943          ppARMNImm(i->ARMin.NeonImm.imm);
   1944          return;
   1945       case ARMin_NCMovQ:
   1946          vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
   1947          ppHRegARM(i->ARMin.NCMovQ.dst);
   1948          vex_printf(", ");
   1949          ppHRegARM(i->ARMin.NCMovQ.src);
   1950          return;
   1951       case ARMin_Add32:
   1952          vex_printf("add32 ");
   1953          ppHRegARM(i->ARMin.Add32.rD);
   1954          vex_printf(", ");
   1955          ppHRegARM(i->ARMin.Add32.rN);
   1956          vex_printf(", ");
   1957          vex_printf("%d", i->ARMin.Add32.imm32);
   1958          return;
   1959       case ARMin_EvCheck:
   1960          vex_printf("(evCheck) ldr r12,");
   1961          ppARMAMode1(i->ARMin.EvCheck.amCounter);
   1962          vex_printf("; subs r12,r12,$1; str r12,");
   1963          ppARMAMode1(i->ARMin.EvCheck.amCounter);
   1964          vex_printf("; bpl nofail; ldr r12,");
   1965          ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
   1966          vex_printf("; bx r12; nofail:");
   1967          return;
   1968       case ARMin_ProfInc:
   1969          vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
   1970                     "movw r12,HI16($NotKnownYet); "
   1971                     "ldr r11,[r12]; "
   1972                     "adds r11,r11,$1; "
   1973                     "str r11,[r12]; "
   1974                     "ldr r11,[r12+4]; "
   1975                     "adc r11,r11,$0; "
   1976                     "str r11,[r12+4]");
   1977          return;
   1978       default:
   1979       unhandled:
   1980          vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
   1981          vpanic("ppARMInstr(1)");
   1982          return;
   1983    }
   1984 }
   1985 
   1986 
   1987 /* --------- Helpers for register allocation. --------- */
   1988 
   1989 void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
   1990 {
   1991    vassert(mode64 == False);
   1992    initHRegUsage(u);
   1993    switch (i->tag) {
   1994       case ARMin_Alu:
   1995          addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
   1996          addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
   1997          addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
   1998          return;
   1999       case ARMin_Shift:
   2000          addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
   2001          addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
   2002          addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
   2003          return;
   2004       case ARMin_Unary:
   2005          addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
   2006          addHRegUse(u, HRmRead, i->ARMin.Unary.src);
   2007          return;
   2008       case ARMin_CmpOrTst:
   2009          addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
   2010          addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
   2011          return;
   2012       case ARMin_Mov:
   2013          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
   2014          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
   2015          return;
   2016       case ARMin_Imm32:
   2017          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
   2018          return;
   2019       case ARMin_LdSt32:
   2020          addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
   2021          if (i->ARMin.LdSt32.isLoad) {
   2022             addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
   2023          } else {
   2024             addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
   2025          }
   2026          return;
   2027       case ARMin_LdSt16:
   2028          addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
   2029          if (i->ARMin.LdSt16.isLoad) {
   2030             addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
   2031          } else {
   2032             addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
   2033          }
   2034          return;
   2035       case ARMin_LdSt8U:
   2036          addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
   2037          if (i->ARMin.LdSt8U.isLoad) {
   2038             addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
   2039          } else {
   2040             addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
   2041          }
   2042          return;
   2043       case ARMin_Ld8S:
   2044          goto unhandled;
   2045       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
   2046          conditionally exit the block.  Hence we only need to list (1)
   2047          the registers that they read, and (2) the registers that they
   2048          write in the case where the block is not exited.  (2) is
   2049          empty, hence only (1) is relevant here. */
   2050       case ARMin_XDirect:
   2051          addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
   2052          return;
   2053       case ARMin_XIndir:
   2054          addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
   2055          addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
   2056          return;
   2057       case ARMin_XAssisted:
   2058          addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
   2059          addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
   2060          return;
   2061       case ARMin_CMov:
   2062          addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
   2063          addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
   2064          addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
   2065          return;
   2066       case ARMin_Call:
   2067          /* logic and comments copied/modified from x86 back end */
   2068          /* This is a bit subtle. */
   2069          /* First off, claim it trashes all the caller-saved regs
   2070             which fall within the register allocator's jurisdiction.
   2071             These I believe to be r0,1,2,3.  If it turns out that r9
   2072             is also caller-saved, then we'll have to add that here
   2073             too. */
   2074          addHRegUse(u, HRmWrite, hregARM_R0());
   2075          addHRegUse(u, HRmWrite, hregARM_R1());
   2076          addHRegUse(u, HRmWrite, hregARM_R2());
   2077          addHRegUse(u, HRmWrite, hregARM_R3());
   2078          /* Now we have to state any parameter-carrying registers
   2079             which might be read.  This depends on nArgRegs. */
   2080          switch (i->ARMin.Call.nArgRegs) {
   2081             case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
   2082             case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
   2083             case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
   2084             case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
   2085             case 0: break;
   2086             default: vpanic("getRegUsage_ARM:Call:regparms");
   2087          }
   2088          /* Finally, there is the issue that the insn trashes a
   2089             register because the literal target address has to be
   2090             loaded into a register.  Fortunately, for the nArgRegs=
   2091             0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
   2092             this does not cause any further damage.  For the
   2093             nArgRegs=4 case, we'll have to choose another register
   2094             arbitrarily since all the caller saved regs are used for
   2095             parameters, and so we might as well choose r11.
   2096             */
   2097          if (i->ARMin.Call.nArgRegs == 4)
   2098             addHRegUse(u, HRmWrite, hregARM_R11());
   2099          /* Upshot of this is that the assembler really must observe
   2100             the here-stated convention of which register to use as an
   2101             address temporary, depending on nArgRegs: 0==r0,
   2102             1==r1, 2==r2, 3==r3, 4==r11 */
   2103          return;
   2104       case ARMin_Mul:
   2105          addHRegUse(u, HRmRead, hregARM_R2());
   2106          addHRegUse(u, HRmRead, hregARM_R3());
   2107          addHRegUse(u, HRmWrite, hregARM_R0());
   2108          if (i->ARMin.Mul.op != ARMmul_PLAIN)
   2109             addHRegUse(u, HRmWrite, hregARM_R1());
   2110          return;
   2111       case ARMin_Div:
   2112          addHRegUse(u, HRmWrite, i->ARMin.Div.dst);
   2113          addHRegUse(u, HRmRead, i->ARMin.Div.argL);
   2114          addHRegUse(u, HRmRead, i->ARMin.Div.argR);
   2115          return;
   2116       case ARMin_LdrEX:
   2117          addHRegUse(u, HRmRead, hregARM_R4());
   2118          addHRegUse(u, HRmWrite, hregARM_R2());
   2119          if (i->ARMin.LdrEX.szB == 8)
   2120             addHRegUse(u, HRmWrite, hregARM_R3());
   2121          return;
   2122       case ARMin_StrEX:
   2123          addHRegUse(u, HRmRead, hregARM_R4());
   2124          addHRegUse(u, HRmWrite, hregARM_R0());
   2125          addHRegUse(u, HRmRead, hregARM_R2());
   2126          if (i->ARMin.StrEX.szB == 8)
   2127             addHRegUse(u, HRmRead, hregARM_R3());
   2128          return;
   2129       case ARMin_VLdStD:
   2130          addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
   2131          if (i->ARMin.VLdStD.isLoad) {
   2132             addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
   2133          } else {
   2134             addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
   2135          }
   2136          return;
   2137       case ARMin_VLdStS:
   2138          addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
   2139          if (i->ARMin.VLdStS.isLoad) {
   2140             addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
   2141          } else {
   2142             addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
   2143          }
   2144          return;
   2145       case ARMin_VAluD:
   2146          addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
   2147          addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
   2148          addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
   2149          return;
   2150       case ARMin_VAluS:
   2151          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
   2152          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
   2153          addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
   2154          return;
   2155       case ARMin_VUnaryD:
   2156          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
   2157          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
   2158          return;
   2159       case ARMin_VUnaryS:
   2160          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
   2161          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
   2162          return;
   2163       case ARMin_VCmpD:
   2164          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
   2165          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
   2166          return;
   2167       case ARMin_VCMovD:
   2168          addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
   2169          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
   2170          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
   2171          return;
   2172       case ARMin_VCMovS:
   2173          addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
   2174          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
   2175          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
   2176          return;
   2177       case ARMin_VCvtSD:
   2178          addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
   2179          addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
   2180          return;
   2181       case ARMin_VXferD:
   2182          if (i->ARMin.VXferD.toD) {
   2183             addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
   2184             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
   2185             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
   2186          } else {
   2187             addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
   2188             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
   2189             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
   2190          }
   2191          return;
   2192       case ARMin_VXferS:
   2193          if (i->ARMin.VXferS.toS) {
   2194             addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
   2195             addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
   2196          } else {
   2197             addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
   2198             addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
   2199          }
   2200          return;
   2201       case ARMin_VCvtID:
   2202          addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
   2203          addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
   2204          return;
   2205       case ARMin_FPSCR:
   2206          if (i->ARMin.FPSCR.toFPSCR)
   2207             addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
   2208          else
   2209             addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
   2210          return;
   2211       case ARMin_MFence:
   2212          return;
   2213       case ARMin_CLREX:
   2214          return;
   2215       case ARMin_NLdStQ:
   2216          if (i->ARMin.NLdStQ.isLoad)
   2217             addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
   2218          else
   2219             addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
   2220          addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
   2221          return;
   2222       case ARMin_NLdStD:
   2223          if (i->ARMin.NLdStD.isLoad)
   2224             addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
   2225          else
   2226             addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
   2227          addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
   2228          return;
   2229       case ARMin_NUnary:
   2230          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
   2231          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
   2232          return;
   2233       case ARMin_NUnaryS:
   2234          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
   2235          addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
   2236          return;
   2237       case ARMin_NShift:
   2238          addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
   2239          addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
   2240          addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
   2241          return;
   2242       case ARMin_NDual:
   2243          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
   2244          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
   2245          addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
   2246          addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
   2247          return;
   2248       case ARMin_NBinary:
   2249          addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
   2250          /* TODO: sometimes dst is also being read! */
   2251          // XXX fix this
   2252          addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
   2253          addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
   2254          return;
   2255       case ARMin_NeonImm:
   2256          addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
   2257          return;
   2258       case ARMin_NCMovQ:
   2259          addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
   2260          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
   2261          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
   2262          return;
   2263       case ARMin_Add32:
   2264          addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
   2265          addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
   2266          return;
   2267       case ARMin_EvCheck:
   2268          /* We expect both amodes only to mention r8, so this is in
   2269             fact pointless, since r8 isn't allocatable, but
   2270             anyway.. */
   2271          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
   2272          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
   2273          addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
   2274          return;
   2275       case ARMin_ProfInc:
   2276          addHRegUse(u, HRmWrite, hregARM_R12());
   2277          addHRegUse(u, HRmWrite, hregARM_R11());
   2278          return;
   2279       unhandled:
   2280       default:
   2281          ppARMInstr(i);
   2282          vpanic("getRegUsage_ARMInstr");
   2283    }
   2284 }
   2285 
   2286 
   2287 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
   2288 {
   2289    vassert(mode64 == False);
   2290    switch (i->tag) {
   2291       case ARMin_Alu:
   2292          i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
   2293          i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
   2294          mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
   2295          return;
   2296       case ARMin_Shift:
   2297          i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
   2298          i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
   2299          mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
   2300          return;
   2301       case ARMin_Unary:
   2302          i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
   2303          i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
   2304          return;
   2305       case ARMin_CmpOrTst:
   2306          i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
   2307          mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
   2308          return;
   2309       case ARMin_Mov:
   2310          i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
   2311          mapRegs_ARMRI84(m, i->ARMin.Mov.src);
   2312          return;
   2313       case ARMin_Imm32:
   2314          i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
   2315          return;
   2316       case ARMin_LdSt32:
   2317          i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
   2318          mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
   2319          return;
   2320       case ARMin_LdSt16:
   2321          i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
   2322          mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
   2323          return;
   2324       case ARMin_LdSt8U:
   2325          i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
   2326          mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
   2327          return;
   2328       case ARMin_Ld8S:
   2329          goto unhandled;
   2330       case ARMin_XDirect:
   2331          mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
   2332          return;
   2333       case ARMin_XIndir:
   2334          i->ARMin.XIndir.dstGA
   2335             = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
   2336          mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
   2337          return;
   2338       case ARMin_XAssisted:
   2339          i->ARMin.XAssisted.dstGA
   2340             = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
   2341          mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
   2342          return;
   2343       case ARMin_CMov:
   2344          i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
   2345          mapRegs_ARMRI84(m, i->ARMin.CMov.src);
   2346          return;
   2347       case ARMin_Call:
   2348          return;
   2349       case ARMin_Mul:
   2350          return;
   2351       case ARMin_Div:
   2352          i->ARMin.Div.dst = lookupHRegRemap(m, i->ARMin.Div.dst);
   2353          i->ARMin.Div.argL = lookupHRegRemap(m, i->ARMin.Div.argL);
   2354          i->ARMin.Div.argR = lookupHRegRemap(m, i->ARMin.Div.argR);
   2355          return;
   2356       case ARMin_LdrEX:
   2357          return;
   2358       case ARMin_StrEX:
   2359          return;
   2360       case ARMin_VLdStD:
   2361          i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
   2362          mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
   2363          return;
   2364       case ARMin_VLdStS:
   2365          i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
   2366          mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
   2367          return;
   2368       case ARMin_VAluD:
   2369          i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
   2370          i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
   2371          i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
   2372          return;
   2373       case ARMin_VAluS:
   2374          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
   2375          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
   2376          i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
   2377          return;
   2378       case ARMin_VUnaryD:
   2379          i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
   2380          i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
   2381          return;
   2382       case ARMin_VUnaryS:
   2383          i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
   2384          i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
   2385          return;
   2386       case ARMin_VCmpD:
   2387          i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
   2388          i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
   2389          return;
   2390       case ARMin_VCMovD:
   2391          i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
   2392          i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
   2393          return;
   2394       case ARMin_VCMovS:
   2395          i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
   2396          i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
   2397          return;
   2398       case ARMin_VCvtSD:
   2399          i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
   2400          i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
   2401          return;
   2402       case ARMin_VXferD:
   2403          i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
   2404          i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
   2405          i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
   2406          return;
   2407       case ARMin_VXferS:
   2408          i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
   2409          i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
   2410          return;
   2411       case ARMin_VCvtID:
   2412          i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
   2413          i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
   2414          return;
   2415       case ARMin_FPSCR:
   2416          i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
   2417          return;
   2418       case ARMin_MFence:
   2419          return;
   2420       case ARMin_CLREX:
   2421          return;
   2422       case ARMin_NLdStQ:
   2423          i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
   2424          mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
   2425          return;
   2426       case ARMin_NLdStD:
   2427          i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
   2428          mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
   2429          return;
   2430       case ARMin_NUnary:
   2431          i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
   2432          i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
   2433          return;
   2434       case ARMin_NUnaryS:
   2435          i->ARMin.NUnaryS.src->reg
   2436             = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
   2437          i->ARMin.NUnaryS.dst->reg
   2438             = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
   2439          return;
   2440       case ARMin_NShift:
   2441          i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
   2442          i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
   2443          i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
   2444          return;
   2445       case ARMin_NDual:
   2446          i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
   2447          i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
   2448          return;
   2449       case ARMin_NBinary:
   2450          i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
   2451          i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
   2452          i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
   2453          return;
   2454       case ARMin_NeonImm:
   2455          i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
   2456          return;
   2457       case ARMin_NCMovQ:
   2458          i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
   2459          i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
   2460          return;
   2461       case ARMin_Add32:
   2462          i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
   2463          i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
   2464          return;
   2465       case ARMin_EvCheck:
   2466          /* We expect both amodes only to mention r8, so this is in
   2467             fact pointless, since r8 isn't allocatable, but
   2468             anyway.. */
   2469          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
   2470          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
   2471          return;
   2472       case ARMin_ProfInc:
   2473          /* hardwires r11 and r12 -- nothing to modify. */
   2474          return;
   2475       unhandled:
   2476       default:
   2477          ppARMInstr(i);
   2478          vpanic("mapRegs_ARMInstr");
   2479    }
   2480 }
   2481 
   2482 /* Figure out if i represents a reg-reg move, and if so assign the
   2483    source and destination to *src and *dst.  If in doubt say No.  Used
   2484    by the register allocator to do move coalescing.
   2485 */
   2486 Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
   2487 {
   2488    /* Moves between integer regs */
   2489    switch (i->tag) {
   2490       case ARMin_Mov:
   2491          if (i->ARMin.Mov.src->tag == ARMri84_R) {
   2492             *src = i->ARMin.Mov.src->ARMri84.R.reg;
   2493             *dst = i->ARMin.Mov.dst;
   2494             return True;
   2495          }
   2496          break;
   2497       case ARMin_VUnaryD:
   2498          if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
   2499             *src = i->ARMin.VUnaryD.src;
   2500             *dst = i->ARMin.VUnaryD.dst;
   2501             return True;
   2502          }
   2503          break;
   2504       case ARMin_VUnaryS:
   2505          if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
   2506             *src = i->ARMin.VUnaryS.src;
   2507             *dst = i->ARMin.VUnaryS.dst;
   2508             return True;
   2509          }
   2510          break;
   2511       case ARMin_NUnary:
   2512          if (i->ARMin.NUnary.op == ARMneon_COPY) {
   2513             *src = i->ARMin.NUnary.src;
   2514             *dst = i->ARMin.NUnary.dst;
   2515             return True;
   2516          }
   2517          break;
   2518       default:
   2519          break;
   2520    }
   2521 
   2522    return False;
   2523 }
   2524 
   2525 
   2526 /* Generate arm spill/reload instructions under the direction of the
   2527    register allocator.  Note it's critical these don't write the
   2528    condition codes. */
   2529 
   2530 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2531                     HReg rreg, Int offsetB, Bool mode64 )
   2532 {
   2533    HRegClass rclass;
   2534    vassert(offsetB >= 0);
   2535    vassert(!hregIsVirtual(rreg));
   2536    vassert(mode64 == False);
   2537    *i1 = *i2 = NULL;
   2538    rclass = hregClass(rreg);
   2539    switch (rclass) {
   2540       case HRcInt32:
   2541          vassert(offsetB <= 4095);
   2542          *i1 = ARMInstr_LdSt32( False/*!isLoad*/,
   2543                                 rreg,
   2544                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2545          return;
   2546       case HRcFlt32:
   2547       case HRcFlt64: {
   2548          HReg r8   = hregARM_R8();  /* baseblock */
   2549          HReg r12  = hregARM_R12(); /* spill temp */
   2550          HReg base = r8;
   2551          vassert(0 == (offsetB & 3));
   2552          if (offsetB >= 1024) {
   2553             Int offsetKB = offsetB / 1024;
   2554             /* r12 = r8 + (1024 * offsetKB) */
   2555             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2556                                ARMRI84_I84(offsetKB, 11));
   2557             offsetB -= (1024 * offsetKB);
   2558             base = r12;
   2559          }
   2560          vassert(offsetB <= 1020);
   2561          if (rclass == HRcFlt32) {
   2562             *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
   2563                                    rreg,
   2564                                    mkARMAModeV(base, offsetB) );
   2565          } else {
   2566             *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
   2567                                    rreg,
   2568                                    mkARMAModeV(base, offsetB) );
   2569          }
   2570          return;
   2571       }
   2572       case HRcVec128: {
   2573          HReg r8  = hregARM_R8();
   2574          HReg r12 = hregARM_R12();
   2575          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2576          *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
   2577          return;
   2578       }
   2579       default:
   2580          ppHRegClass(rclass);
   2581          vpanic("genSpill_ARM: unimplemented regclass");
   2582    }
   2583 }
   2584 
   2585 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2586                      HReg rreg, Int offsetB, Bool mode64 )
   2587 {
   2588    HRegClass rclass;
   2589    vassert(offsetB >= 0);
   2590    vassert(!hregIsVirtual(rreg));
   2591    vassert(mode64 == False);
   2592    *i1 = *i2 = NULL;
   2593    rclass = hregClass(rreg);
   2594    switch (rclass) {
   2595       case HRcInt32:
   2596          vassert(offsetB <= 4095);
   2597          *i1 = ARMInstr_LdSt32( True/*isLoad*/,
   2598                                 rreg,
   2599                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2600          return;
   2601       case HRcFlt32:
   2602       case HRcFlt64: {
   2603          HReg r8   = hregARM_R8();  /* baseblock */
   2604          HReg r12  = hregARM_R12(); /* spill temp */
   2605          HReg base = r8;
   2606          vassert(0 == (offsetB & 3));
   2607          if (offsetB >= 1024) {
   2608             Int offsetKB = offsetB / 1024;
   2609             /* r12 = r8 + (1024 * offsetKB) */
   2610             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2611                                ARMRI84_I84(offsetKB, 11));
   2612             offsetB -= (1024 * offsetKB);
   2613             base = r12;
   2614          }
   2615          vassert(offsetB <= 1020);
   2616          if (rclass == HRcFlt32) {
   2617             *i2 = ARMInstr_VLdStS( True/*isLoad*/,
   2618                                    rreg,
   2619                                    mkARMAModeV(base, offsetB) );
   2620          } else {
   2621             *i2 = ARMInstr_VLdStD( True/*isLoad*/,
   2622                                    rreg,
   2623                                    mkARMAModeV(base, offsetB) );
   2624          }
   2625          return;
   2626       }
   2627       case HRcVec128: {
   2628          HReg r8  = hregARM_R8();
   2629          HReg r12 = hregARM_R12();
   2630          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2631          *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
   2632          return;
   2633       }
   2634       default:
   2635          ppHRegClass(rclass);
   2636          vpanic("genReload_ARM: unimplemented regclass");
   2637    }
   2638 }
   2639 
   2640 
   2641 /* Emit an instruction into buf and return the number of bytes used.
   2642    Note that buf is not the insn's final place, and therefore it is
   2643    imperative to emit position-independent code. */
   2644 
   2645 static inline UChar iregNo ( HReg r )
   2646 {
   2647    UInt n;
   2648    vassert(hregClass(r) == HRcInt32);
   2649    vassert(!hregIsVirtual(r));
   2650    n = hregNumber(r);
   2651    vassert(n <= 15);
   2652    return toUChar(n);
   2653 }
   2654 
   2655 static inline UChar dregNo ( HReg r )
   2656 {
   2657    UInt n;
   2658    if (hregClass(r) != HRcFlt64)
   2659       ppHRegClass(hregClass(r));
   2660    vassert(hregClass(r) == HRcFlt64);
   2661    vassert(!hregIsVirtual(r));
   2662    n = hregNumber(r);
   2663    vassert(n <= 31);
   2664    return toUChar(n);
   2665 }
   2666 
   2667 static inline UChar fregNo ( HReg r )
   2668 {
   2669    UInt n;
   2670    vassert(hregClass(r) == HRcFlt32);
   2671    vassert(!hregIsVirtual(r));
   2672    n = hregNumber(r);
   2673    vassert(n <= 31);
   2674    return toUChar(n);
   2675 }
   2676 
   2677 static inline UChar qregNo ( HReg r )
   2678 {
   2679    UInt n;
   2680    vassert(hregClass(r) == HRcVec128);
   2681    vassert(!hregIsVirtual(r));
   2682    n = hregNumber(r);
   2683    vassert(n <= 15);
   2684    return toUChar(n);
   2685 }
   2686 
   2687 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
   2688    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
   2689 #define X0000  BITS4(0,0,0,0)
   2690 #define X0001  BITS4(0,0,0,1)
   2691 #define X0010  BITS4(0,0,1,0)
   2692 #define X0011  BITS4(0,0,1,1)
   2693 #define X0100  BITS4(0,1,0,0)
   2694 #define X0101  BITS4(0,1,0,1)
   2695 #define X0110  BITS4(0,1,1,0)
   2696 #define X0111  BITS4(0,1,1,1)
   2697 #define X1000  BITS4(1,0,0,0)
   2698 #define X1001  BITS4(1,0,0,1)
   2699 #define X1010  BITS4(1,0,1,0)
   2700 #define X1011  BITS4(1,0,1,1)
   2701 #define X1100  BITS4(1,1,0,0)
   2702 #define X1101  BITS4(1,1,0,1)
   2703 #define X1110  BITS4(1,1,1,0)
   2704 #define X1111  BITS4(1,1,1,1)
   2705 
   2706 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
   2707    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2708     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2709     (((zzx3) & 0xF) << 12))
   2710 
   2711 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
   2712    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2713     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2714     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
   2715 
   2716 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
   2717    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2718     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2719     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
   2720 
   2721 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
   2722   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
   2723    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
   2724    (((zzx0) & 0xF) << 0))
   2725 
   2726 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
   2727    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2728     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2729     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
   2730     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
   2731 
   2732 #define XX______(zzx7,zzx6) \
   2733    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
   2734 
   2735 /* Generate a skeletal insn that involves an a RI84 shifter operand.
   2736    Returns a word which is all zeroes apart from bits 25 and 11..0,
   2737    since it is those that encode the shifter operand (at least to the
   2738    extent that we care about it.) */
   2739 static UInt skeletal_RI84 ( ARMRI84* ri )
   2740 {
   2741    UInt instr;
   2742    if (ri->tag == ARMri84_I84) {
   2743       vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
   2744       vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
   2745       instr = 1 << 25;
   2746       instr |= (ri->ARMri84.I84.imm4 << 8);
   2747       instr |= ri->ARMri84.I84.imm8;
   2748    } else {
   2749       instr = 0 << 25;
   2750       instr |= iregNo(ri->ARMri84.R.reg);
   2751    }
   2752    return instr;
   2753 }
   2754 
   2755 /* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
   2756    11..7. */
   2757 static UInt skeletal_RI5 ( ARMRI5* ri )
   2758 {
   2759    UInt instr;
   2760    if (ri->tag == ARMri5_I5) {
   2761       UInt imm5 = ri->ARMri5.I5.imm5;
   2762       vassert(imm5 >= 1 && imm5 <= 31);
   2763       instr = 0 << 4;
   2764       instr |= imm5 << 7;
   2765    } else {
   2766       instr = 1 << 4;
   2767       instr |= iregNo(ri->ARMri5.R.reg) << 8;
   2768    }
   2769    return instr;
   2770 }
   2771 
   2772 
   2773 /* Get an immediate into a register, using only that
   2774    register.  (very lame..) */
   2775 static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
   2776 {
   2777    UInt instr;
   2778    vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
   2779 #if 0
   2780    if (0 == (imm32 & ~0xFF)) {
   2781       /* mov with a immediate shifter operand of (0, imm32) (??) */
   2782       instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
   2783       instr |= imm32;
   2784       *p++ = instr;
   2785    } else {
   2786       // this is very bad; causes Dcache pollution
   2787       // ldr  rD, [pc]
   2788       instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
   2789       *p++ = instr;
   2790       // b .+8
   2791       instr = 0xEA000000;
   2792       *p++ = instr;
   2793       // .word imm32
   2794       *p++ = imm32;
   2795    }
   2796 #else
   2797    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2798       /* Generate movw rD, #low16.  Then, if the high 16 are
   2799          nonzero, generate movt rD, #high16. */
   2800       UInt lo16 = imm32 & 0xFFFF;
   2801       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2802       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2803                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2804                        lo16 & 0xF);
   2805       *p++ = instr;
   2806       if (hi16 != 0) {
   2807          instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2808                           (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2809                           hi16 & 0xF);
   2810          *p++ = instr;
   2811       }
   2812    } else {
   2813       UInt imm, rot;
   2814       UInt op = X1010;
   2815       UInt rN = 0;
   2816       if ((imm32 & 0xFF) || (imm32 == 0)) {
   2817          imm = imm32 & 0xFF;
   2818          rot = 0;
   2819          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2820          *p++ = instr;
   2821          op = X1000;
   2822          rN = rD;
   2823       }
   2824       if (imm32 & 0xFF000000) {
   2825          imm = (imm32 >> 24) & 0xFF;
   2826          rot = 4;
   2827          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2828          *p++ = instr;
   2829          op = X1000;
   2830          rN = rD;
   2831       }
   2832       if (imm32 & 0xFF0000) {
   2833          imm = (imm32 >> 16) & 0xFF;
   2834          rot = 8;
   2835          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2836          *p++ = instr;
   2837          op = X1000;
   2838          rN = rD;
   2839       }
   2840       if (imm32 & 0xFF00) {
   2841          imm = (imm32 >> 8) & 0xFF;
   2842          rot = 12;
   2843          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2844          *p++ = instr;
   2845          op = X1000;
   2846          rN = rD;
   2847       }
   2848    }
   2849 #endif
   2850    return p;
   2851 }
   2852 
   2853 /* Get an immediate into a register, using only that register, and
   2854    generating exactly 2 instructions, regardless of the value of the
   2855    immediate. This is used when generating sections of code that need
   2856    to be patched later, so as to guarantee a specific size. */
   2857 static UInt* imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
   2858 {
   2859    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2860       /* Generate movw rD, #low16 ;  movt rD, #high16. */
   2861       UInt lo16 = imm32 & 0xFFFF;
   2862       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2863       UInt instr;
   2864       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2865                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2866                        lo16 & 0xF);
   2867       *p++ = instr;
   2868       instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2869                        (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2870                        hi16 & 0xF);
   2871       *p++ = instr;
   2872    } else {
   2873       vassert(0); /* lose */
   2874    }
   2875    return p;
   2876 }
   2877 
   2878 /* Check whether p points at a 2-insn sequence cooked up by
   2879    imm32_to_iregNo_EXACTLY2(). */
   2880 static Bool is_imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
   2881 {
   2882    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2883       /* Generate movw rD, #low16 ;  movt rD, #high16. */
   2884       UInt lo16 = imm32 & 0xFFFF;
   2885       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2886       UInt i0, i1;
   2887       i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2888                     (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2889                     lo16 & 0xF);
   2890       i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2891                     (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2892                     hi16 & 0xF);
   2893       return p[0] == i0 && p[1] == i1;
   2894    } else {
   2895       vassert(0); /* lose */
   2896    }
   2897 }
   2898 
   2899 
   2900 static UInt* do_load_or_store32 ( UInt* p,
   2901                                   Bool isLoad, UInt rD, ARMAMode1* am )
   2902 {
   2903    vassert(rD <= 12);
   2904    vassert(am->tag == ARMam1_RI); // RR case is not handled
   2905    UInt bB = 0;
   2906    UInt bL = isLoad ? 1 : 0;
   2907    Int  simm12;
   2908    UInt instr, bP;
   2909    if (am->ARMam1.RI.simm13 < 0) {
   2910       bP = 0;
   2911       simm12 = -am->ARMam1.RI.simm13;
   2912    } else {
   2913       bP = 1;
   2914       simm12 = am->ARMam1.RI.simm13;
   2915    }
   2916    vassert(simm12 >= 0 && simm12 <= 4095);
   2917    instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
   2918                     iregNo(am->ARMam1.RI.reg),
   2919                     rD);
   2920    instr |= simm12;
   2921    *p++ = instr;
   2922    return p;
   2923 }
   2924 
   2925 
   2926 /* Emit an instruction into buf and return the number of bytes used.
   2927    Note that buf is not the insn's final place, and therefore it is
   2928    imperative to emit position-independent code.  If the emitted
   2929    instruction was a profiler inc, set *is_profInc to True, else
   2930    leave it unchanged. */
   2931 
   2932 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
   2933                     UChar* buf, Int nbuf, ARMInstr* i,
   2934                     Bool mode64,
   2935                     void* disp_cp_chain_me_to_slowEP,
   2936                     void* disp_cp_chain_me_to_fastEP,
   2937                     void* disp_cp_xindir,
   2938                     void* disp_cp_xassisted )
   2939 {
   2940    UInt* p = (UInt*)buf;
   2941    vassert(nbuf >= 32);
   2942    vassert(mode64 == False);
   2943    vassert(0 == (((HWord)buf) & 3));
   2944 
   2945    switch (i->tag) {
   2946       case ARMin_Alu: {
   2947          UInt     instr, subopc;
   2948          UInt     rD   = iregNo(i->ARMin.Alu.dst);
   2949          UInt     rN   = iregNo(i->ARMin.Alu.argL);
   2950          ARMRI84* argR = i->ARMin.Alu.argR;
   2951          switch (i->ARMin.Alu.op) {
   2952             case ARMalu_ADDS: /* fallthru */
   2953             case ARMalu_ADD:  subopc = X0100; break;
   2954             case ARMalu_ADC:  subopc = X0101; break;
   2955             case ARMalu_SUBS: /* fallthru */
   2956             case ARMalu_SUB:  subopc = X0010; break;
   2957             case ARMalu_SBC:  subopc = X0110; break;
   2958             case ARMalu_AND:  subopc = X0000; break;
   2959             case ARMalu_BIC:  subopc = X1110; break;
   2960             case ARMalu_OR:   subopc = X1100; break;
   2961             case ARMalu_XOR:  subopc = X0001; break;
   2962             default: goto bad;
   2963          }
   2964          instr = skeletal_RI84(argR);
   2965          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   2966                            (subopc << 1) & 0xF, rN, rD);
   2967          if (i->ARMin.Alu.op == ARMalu_ADDS
   2968              || i->ARMin.Alu.op == ARMalu_SUBS) {
   2969             instr |= 1<<20;  /* set the S bit */
   2970          }
   2971          *p++ = instr;
   2972          goto done;
   2973       }
   2974       case ARMin_Shift: {
   2975          UInt    instr, subopc;
   2976          HReg    rD   = iregNo(i->ARMin.Shift.dst);
   2977          HReg    rM   = iregNo(i->ARMin.Shift.argL);
   2978          ARMRI5* argR = i->ARMin.Shift.argR;
   2979          switch (i->ARMin.Shift.op) {
   2980             case ARMsh_SHL: subopc = X0000; break;
   2981             case ARMsh_SHR: subopc = X0001; break;
   2982             case ARMsh_SAR: subopc = X0010; break;
   2983             default: goto bad;
   2984          }
   2985          instr = skeletal_RI5(argR);
   2986          instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
   2987          instr |= (subopc & 3) << 5;
   2988          *p++ = instr;
   2989          goto done;
   2990       }
   2991       case ARMin_Unary: {
   2992          UInt instr;
   2993          HReg rDst = iregNo(i->ARMin.Unary.dst);
   2994          HReg rSrc = iregNo(i->ARMin.Unary.src);
   2995          switch (i->ARMin.Unary.op) {
   2996             case ARMun_CLZ:
   2997                instr = XXXXXXXX(X1110,X0001,X0110,X1111,
   2998                                 rDst,X1111,X0001,rSrc);
   2999                *p++ = instr;
   3000                goto done;
   3001             case ARMun_NEG: /* RSB rD,rS,#0 */
   3002                instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
   3003                *p++ = instr;
   3004                goto done;
   3005             case ARMun_NOT: {
   3006                UInt subopc = X1111; /* MVN */
   3007                instr = rSrc;
   3008                instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3009                                  (subopc << 1) & 0xF, 0, rDst);
   3010                *p++ = instr;
   3011                goto done;
   3012             }
   3013             default:
   3014                break;
   3015          }
   3016          goto bad;
   3017       }
   3018       case ARMin_CmpOrTst: {
   3019          UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
   3020          UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
   3021          UInt SBZ    = 0;
   3022          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3023                            ((subopc << 1) & 0xF) | 1,
   3024                            i->ARMin.CmpOrTst.argL, SBZ );
   3025          *p++ = instr;
   3026          goto done;
   3027       }
   3028       case ARMin_Mov: {
   3029          UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
   3030          UInt subopc = X1101; /* MOV */
   3031          UInt SBZ    = 0;
   3032          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3033                            (subopc << 1) & 0xF, SBZ, i->ARMin.Mov.dst);
   3034          *p++ = instr;
   3035          goto done;
   3036       }
   3037       case ARMin_Imm32: {
   3038          p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
   3039                                         i->ARMin.Imm32.imm32 );
   3040          goto done;
   3041       }
   3042       case ARMin_LdSt32:
   3043       case ARMin_LdSt8U: {
   3044          UInt       bL, bB;
   3045          HReg       rD;
   3046          ARMAMode1* am;
   3047          if (i->tag == ARMin_LdSt32) {
   3048             bB = 0;
   3049             bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
   3050             am = i->ARMin.LdSt32.amode;
   3051             rD = i->ARMin.LdSt32.rD;
   3052          } else {
   3053             bB = 1;
   3054             bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
   3055             am = i->ARMin.LdSt8U.amode;
   3056             rD = i->ARMin.LdSt8U.rD;
   3057          }
   3058          if (am->tag == ARMam1_RI) {
   3059             Int  simm12;
   3060             UInt instr, bP;
   3061             if (am->ARMam1.RI.simm13 < 0) {
   3062                bP = 0;
   3063                simm12 = -am->ARMam1.RI.simm13;
   3064             } else {
   3065                bP = 1;
   3066                simm12 = am->ARMam1.RI.simm13;
   3067             }
   3068             vassert(simm12 >= 0 && simm12 <= 4095);
   3069             instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
   3070                              iregNo(am->ARMam1.RI.reg),
   3071                              iregNo(rD));
   3072             instr |= simm12;
   3073             *p++ = instr;
   3074             goto done;
   3075          } else {
   3076             // RR case
   3077             goto bad;
   3078          }
   3079       }
   3080       case ARMin_LdSt16: {
   3081          HReg       rD = i->ARMin.LdSt16.rD;
   3082          UInt       bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
   3083          UInt       bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
   3084          ARMAMode2* am = i->ARMin.LdSt16.amode;
   3085          if (am->tag == ARMam2_RI) {
   3086             HReg rN = am->ARMam2.RI.reg;
   3087             Int  simm8;
   3088             UInt bP, imm8hi, imm8lo, instr;
   3089             if (am->ARMam2.RI.simm9 < 0) {
   3090                bP = 0;
   3091                simm8 = -am->ARMam2.RI.simm9;
   3092             } else {
   3093                bP = 1;
   3094                simm8 = am->ARMam2.RI.simm9;
   3095             }
   3096             vassert(simm8 >= 0 && simm8 <= 255);
   3097             imm8hi = (simm8 >> 4) & 0xF;
   3098             imm8lo = simm8 & 0xF;
   3099             vassert(!(bL == 0 && bS == 1)); // "! signed store"
   3100             /**/ if (bL == 0 && bS == 0) {
   3101                // strh
   3102                instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,0), iregNo(rN),
   3103                                 iregNo(rD), imm8hi, X1011, imm8lo);
   3104                *p++ = instr;
   3105                goto done;
   3106             }
   3107             else if (bL == 1 && bS == 0) {
   3108                // ldrh
   3109                instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,1), iregNo(rN),
   3110                                 iregNo(rD), imm8hi, X1011, imm8lo);
   3111                *p++ = instr;
   3112                goto done;
   3113             }
   3114             else if (bL == 1 && bS == 1) {
   3115                goto bad;
   3116             }
   3117             else vassert(0); // ill-constructed insn
   3118          } else {
   3119             // RR case
   3120             goto bad;
   3121          }
   3122       }
   3123       case ARMin_Ld8S:
   3124          goto bad;
   3125 
   3126       case ARMin_XDirect: {
   3127          /* NB: what goes on here has to be very closely coordinated
   3128             with the chainXDirect_ARM and unchainXDirect_ARM below. */
   3129          /* We're generating chain-me requests here, so we need to be
   3130             sure this is actually allowed -- no-redir translations
   3131             can't use chain-me's.  Hence: */
   3132          vassert(disp_cp_chain_me_to_slowEP != NULL);
   3133          vassert(disp_cp_chain_me_to_fastEP != NULL);
   3134 
   3135          /* Use ptmp for backpatching conditional jumps. */
   3136          UInt* ptmp = NULL;
   3137 
   3138          /* First off, if this is conditional, create a conditional
   3139             jump over the rest of it.  Or at least, leave a space for
   3140             it that we will shortly fill in. */
   3141          if (i->ARMin.XDirect.cond != ARMcc_AL) {
   3142             vassert(i->ARMin.XDirect.cond != ARMcc_NV);
   3143             ptmp = p;
   3144             *p++ = 0;
   3145          }
   3146 
   3147          /* Update the guest R15T. */
   3148          /* movw r12, lo16(dstGA) */
   3149          /* movt r12, hi16(dstGA) */
   3150          /* str r12, amR15T */
   3151          p = imm32_to_iregNo(p, /*r*/12, i->ARMin.XDirect.dstGA);
   3152          p = do_load_or_store32(p, False/*!isLoad*/,
   3153                                 /*r*/12, i->ARMin.XDirect.amR15T);
   3154 
   3155          /* --- FIRST PATCHABLE BYTE follows --- */
   3156          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
   3157             calling to) backs up the return address, so as to find the
   3158             address of the first patchable byte.  So: don't change the
   3159             number of instructions (3) below. */
   3160          /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
   3161          /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
   3162          /* blx  r12  (A1) */
   3163          void* disp_cp_chain_me
   3164                   = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
   3165                                               : disp_cp_chain_me_to_slowEP;
   3166          p = imm32_to_iregNo_EXACTLY2(p, /*r*/12,
   3167                                       (UInt)Ptr_to_ULong(disp_cp_chain_me));
   3168          *p++ = 0xE12FFF3C;
   3169          /* --- END of PATCHABLE BYTES --- */
   3170 
   3171          /* Fix up the conditional jump, if there was one. */
   3172          if (i->ARMin.XDirect.cond != ARMcc_AL) {
   3173             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3174             vassert(delta > 0 && delta < 40);
   3175             vassert((delta & 3) == 0);
   3176             UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
   3177             vassert(notCond <= 13); /* Neither AL nor NV */
   3178             delta = (delta >> 2) - 2;
   3179             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3180          }
   3181          goto done;
   3182       }
   3183 
   3184       case ARMin_XIndir: {
   3185          /* We're generating transfers that could lead indirectly to a
   3186             chain-me, so we need to be sure this is actually allowed
   3187             -- no-redir translations are not allowed to reach normal
   3188             translations without going through the scheduler.  That
   3189             means no XDirects or XIndirs out from no-redir
   3190             translations.  Hence: */
   3191          vassert(disp_cp_xindir != NULL);
   3192 
   3193          /* Use ptmp for backpatching conditional jumps. */
   3194          UInt* ptmp = NULL;
   3195 
   3196          /* First off, if this is conditional, create a conditional
   3197             jump over the rest of it.  Or at least, leave a space for
   3198             it that we will shortly fill in. */
   3199          if (i->ARMin.XIndir.cond != ARMcc_AL) {
   3200             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
   3201             ptmp = p;
   3202             *p++ = 0;
   3203          }
   3204 
   3205          /* Update the guest R15T. */
   3206          /* str r-dstGA, amR15T */
   3207          p = do_load_or_store32(p, False/*!isLoad*/,
   3208                                 iregNo(i->ARMin.XIndir.dstGA),
   3209                                 i->ARMin.XIndir.amR15T);
   3210 
   3211          /* movw r12, lo16(VG_(disp_cp_xindir)) */
   3212          /* movt r12, hi16(VG_(disp_cp_xindir)) */
   3213          /* bx   r12  (A1) */
   3214          p = imm32_to_iregNo(p, /*r*/12,
   3215                              (UInt)Ptr_to_ULong(disp_cp_xindir));
   3216          *p++ = 0xE12FFF1C;
   3217 
   3218          /* Fix up the conditional jump, if there was one. */
   3219          if (i->ARMin.XIndir.cond != ARMcc_AL) {
   3220             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3221             vassert(delta > 0 && delta < 40);
   3222             vassert((delta & 3) == 0);
   3223             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
   3224             vassert(notCond <= 13); /* Neither AL nor NV */
   3225             delta = (delta >> 2) - 2;
   3226             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3227          }
   3228          goto done;
   3229       }
   3230 
   3231       case ARMin_XAssisted: {
   3232          /* Use ptmp for backpatching conditional jumps. */
   3233          UInt* ptmp = NULL;
   3234 
   3235          /* First off, if this is conditional, create a conditional
   3236             jump over the rest of it.  Or at least, leave a space for
   3237             it that we will shortly fill in. */
   3238          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
   3239             vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
   3240             ptmp = p;
   3241             *p++ = 0;
   3242          }
   3243 
   3244          /* Update the guest R15T. */
   3245          /* str r-dstGA, amR15T */
   3246          p = do_load_or_store32(p, False/*!isLoad*/,
   3247                                 iregNo(i->ARMin.XAssisted.dstGA),
   3248                                 i->ARMin.XAssisted.amR15T);
   3249 
   3250          /* movw r8,  $magic_number */
   3251          UInt trcval = 0;
   3252          switch (i->ARMin.XAssisted.jk) {
   3253             case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
   3254             case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
   3255             //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
   3256             //case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
   3257             //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
   3258             //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
   3259             case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
   3260             case Ijk_TInval:      trcval = VEX_TRC_JMP_TINVAL;      break;
   3261             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
   3262             //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
   3263             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
   3264             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
   3265             /* We don't expect to see the following being assisted. */
   3266             //case Ijk_Ret:
   3267             //case Ijk_Call:
   3268             /* fallthrough */
   3269             default:
   3270                ppIRJumpKind(i->ARMin.XAssisted.jk);
   3271                vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
   3272          }
   3273          vassert(trcval != 0);
   3274          p = imm32_to_iregNo(p, /*r*/8, trcval);
   3275 
   3276          /* movw r12, lo16(VG_(disp_cp_xassisted)) */
   3277          /* movt r12, hi16(VG_(disp_cp_xassisted)) */
   3278          /* bx   r12  (A1) */
   3279          p = imm32_to_iregNo(p, /*r*/12,
   3280                              (UInt)Ptr_to_ULong(disp_cp_xassisted));
   3281          *p++ = 0xE12FFF1C;
   3282 
   3283          /* Fix up the conditional jump, if there was one. */
   3284          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
   3285             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3286             vassert(delta > 0 && delta < 40);
   3287             vassert((delta & 3) == 0);
   3288             UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
   3289             vassert(notCond <= 13); /* Neither AL nor NV */
   3290             delta = (delta >> 2) - 2;
   3291             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3292          }
   3293          goto done;
   3294       }
   3295 
   3296       case ARMin_CMov: {
   3297          UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
   3298          UInt subopc = X1101; /* MOV */
   3299          UInt SBZ    = 0;
   3300          instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
   3301                            (subopc << 1) & 0xF, SBZ, i->ARMin.CMov.dst);
   3302          *p++ = instr;
   3303          goto done;
   3304       }
   3305       case ARMin_Call: {
   3306          UInt instr;
   3307          /* Decide on a scratch reg used to hold to the call address.
   3308             This has to be done as per the comments in getRegUsage. */
   3309          Int scratchNo;
   3310          switch (i->ARMin.Call.nArgRegs) {
   3311             case 0:  scratchNo = 0;  break;
   3312             case 1:  scratchNo = 1;  break;
   3313             case 2:  scratchNo = 2;  break;
   3314             case 3:  scratchNo = 3;  break;
   3315             case 4:  scratchNo = 11; break;
   3316             default: vassert(0);
   3317          }
   3318          // r"scratchNo" = &target
   3319          p = imm32_to_iregNo( (UInt*)p,
   3320                               scratchNo, (UInt)i->ARMin.Call.target );
   3321          // blx{cond} r"scratchNo"
   3322          instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
   3323                           X0011, scratchNo);
   3324          instr |= 0xFFF << 8; // stick in the SBOnes
   3325          *p++ = instr;
   3326          goto done;
   3327       }
   3328       case ARMin_Mul: {
   3329          /* E0000392   mul     r0, r2, r3
   3330             E0810392   umull   r0(LO), r1(HI), r2, r3
   3331             E0C10392   smull   r0(LO), r1(HI), r2, r3
   3332          */
   3333          switch (i->ARMin.Mul.op) {
   3334             case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
   3335             case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
   3336             case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
   3337             default: vassert(0);
   3338          }
   3339          goto bad;
   3340       }
   3341       case ARMin_Div: {
   3342          UInt subopc = i->ARMin.Div.op == ARMdiv_U ?
   3343                         X0011 : X0001;
   3344          UInt rD    = iregNo(i->ARMin.Div.dst);
   3345          UInt rN    = iregNo(i->ARMin.Div.argL);
   3346          UInt rM    = iregNo(i->ARMin.Div.argR);
   3347          UInt instr = XXXXXXXX(X1110, X0111, subopc, rD, 0xF, rM, X0001, rN);
   3348          *p++ = instr;
   3349          goto done;
   3350       }
   3351       case ARMin_LdrEX: {
   3352          /* E1D42F9F   ldrexb r2, [r4]
   3353             E1F42F9F   ldrexh r2, [r4]
   3354             E1942F9F   ldrex  r2, [r4]
   3355             E1B42F9F   ldrexd r2, r3, [r4]
   3356          */
   3357          switch (i->ARMin.LdrEX.szB) {
   3358             case 1: *p++ = 0xE1D42F9F; goto done;
   3359             case 2: *p++ = 0xE1F42F9F; goto done;
   3360             case 4: *p++ = 0xE1942F9F; goto done;
   3361             case 8: *p++ = 0xE1B42F9F; goto done;
   3362             default: break;
   3363          }
   3364          goto bad;
   3365       }
   3366       case ARMin_StrEX: {
   3367          /* E1C40F92   strexb r0, r2, [r4]
   3368             E1E40F92   strexh r0, r2, [r4]
   3369             E1840F92   strex  r0, r2, [r4]
   3370             E1A40F92   strexd r0, r2, r3, [r4]
   3371          */
   3372          switch (i->ARMin.StrEX.szB) {
   3373             case 1: *p++ = 0xE1C40F92; goto done;
   3374             case 2: *p++ = 0xE1E40F92; goto done;
   3375             case 4: *p++ = 0xE1840F92; goto done;
   3376             case 8: *p++ = 0xE1A40F92; goto done;
   3377             default: break;
   3378          }
   3379          goto bad;
   3380       }
   3381       case ARMin_VLdStD: {
   3382          UInt dD     = dregNo(i->ARMin.VLdStD.dD);
   3383          UInt rN     = iregNo(i->ARMin.VLdStD.amode->reg);
   3384          Int  simm11 = i->ARMin.VLdStD.amode->simm11;
   3385          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3386          UInt bU     = simm11 >= 0 ? 1 : 0;
   3387          UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
   3388          UInt insn;
   3389          vassert(0 == (off8 & 3));
   3390          off8 >>= 2;
   3391          vassert(0 == (off8 & 0xFFFFFF00));
   3392          insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
   3393          insn |= off8;
   3394          *p++ = insn;
   3395          goto done;
   3396       }
   3397       case ARMin_VLdStS: {
   3398          UInt fD     = fregNo(i->ARMin.VLdStS.fD);
   3399          UInt rN     = iregNo(i->ARMin.VLdStS.amode->reg);
   3400          Int  simm11 = i->ARMin.VLdStS.amode->simm11;
   3401          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3402          UInt bU     = simm11 >= 0 ? 1 : 0;
   3403          UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
   3404          UInt bD     = fD & 1;
   3405          UInt insn;
   3406          vassert(0 == (off8 & 3));
   3407          off8 >>= 2;
   3408          vassert(0 == (off8 & 0xFFFFFF00));
   3409          insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
   3410          insn |= off8;
   3411          *p++ = insn;
   3412          goto done;
   3413       }
   3414       case ARMin_VAluD: {
   3415          UInt dN = dregNo(i->ARMin.VAluD.argL);
   3416          UInt dD = dregNo(i->ARMin.VAluD.dst);
   3417          UInt dM = dregNo(i->ARMin.VAluD.argR);
   3418          UInt pqrs = X1111; /* undefined */
   3419          switch (i->ARMin.VAluD.op) {
   3420             case ARMvfp_ADD: pqrs = X0110; break;
   3421             case ARMvfp_SUB: pqrs = X0111; break;
   3422             case ARMvfp_MUL: pqrs = X0100; break;
   3423             case ARMvfp_DIV: pqrs = X1000; break;
   3424             default: goto bad;
   3425          }
   3426          vassert(pqrs != X1111);
   3427          UInt bP  = (pqrs >> 3) & 1;
   3428          UInt bQ  = (pqrs >> 2) & 1;
   3429          UInt bR  = (pqrs >> 1) & 1;
   3430          UInt bS  = (pqrs >> 0) & 1;
   3431          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
   3432                               X1011, BITS4(0,bS,0,0), dM);
   3433          *p++ = insn;
   3434          goto done;
   3435       }
   3436       case ARMin_VAluS: {
   3437          UInt dN = fregNo(i->ARMin.VAluS.argL);
   3438          UInt dD = fregNo(i->ARMin.VAluS.dst);
   3439          UInt dM = fregNo(i->ARMin.VAluS.argR);
   3440          UInt bN = dN & 1;
   3441          UInt bD = dD & 1;
   3442          UInt bM = dM & 1;
   3443          UInt pqrs = X1111; /* undefined */
   3444          switch (i->ARMin.VAluS.op) {
   3445             case ARMvfp_ADD: pqrs = X0110; break;
   3446             case ARMvfp_SUB: pqrs = X0111; break;
   3447             case ARMvfp_MUL: pqrs = X0100; break;
   3448             case ARMvfp_DIV: pqrs = X1000; break;
   3449             default: goto bad;
   3450          }
   3451          vassert(pqrs != X1111);
   3452          UInt bP  = (pqrs >> 3) & 1;
   3453          UInt bQ  = (pqrs >> 2) & 1;
   3454          UInt bR  = (pqrs >> 1) & 1;
   3455          UInt bS  = (pqrs >> 0) & 1;
   3456          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
   3457                               (dN >> 1), (dD >> 1),
   3458                               X1010, BITS4(bN,bS,bM,0), (dM >> 1));
   3459          *p++ = insn;
   3460          goto done;
   3461       }
   3462       case ARMin_VUnaryD: {
   3463          UInt dD   = dregNo(i->ARMin.VUnaryD.dst);
   3464          UInt dM   = dregNo(i->ARMin.VUnaryD.src);
   3465          UInt insn = 0;
   3466          switch (i->ARMin.VUnaryD.op) {
   3467             case ARMvfpu_COPY:
   3468                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3469                break;
   3470             case ARMvfpu_ABS:
   3471                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
   3472                break;
   3473             case ARMvfpu_NEG:
   3474                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
   3475                break;
   3476             case ARMvfpu_SQRT:
   3477                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
   3478                break;
   3479             default:
   3480                goto bad;
   3481          }
   3482          *p++ = insn;
   3483          goto done;
   3484       }
   3485       case ARMin_VUnaryS: {
   3486          UInt fD   = fregNo(i->ARMin.VUnaryS.dst);
   3487          UInt fM   = fregNo(i->ARMin.VUnaryS.src);
   3488          UInt insn = 0;
   3489          switch (i->ARMin.VUnaryS.op) {
   3490             case ARMvfpu_COPY:
   3491                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3492                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3493                                (fM >> 1));
   3494                break;
   3495             case ARMvfpu_ABS:
   3496                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3497                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3498                                (fM >> 1));
   3499                break;
   3500             case ARMvfpu_NEG:
   3501                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3502                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3503                                (fM >> 1));
   3504                break;
   3505             case ARMvfpu_SQRT:
   3506                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3507                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3508                                (fM >> 1));
   3509                break;
   3510             default:
   3511                goto bad;
   3512          }
   3513          *p++ = insn;
   3514          goto done;
   3515       }
   3516       case ARMin_VCmpD: {
   3517          UInt dD   = dregNo(i->ARMin.VCmpD.argL);
   3518          UInt dM   = dregNo(i->ARMin.VCmpD.argR);
   3519          UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
   3520          *p++ = insn;       /* FCMPD dD, dM */
   3521          *p++ = 0xEEF1FA10; /* FMSTAT */
   3522          goto done;
   3523       }
   3524       case ARMin_VCMovD: {
   3525          UInt cc = (UInt)i->ARMin.VCMovD.cond;
   3526          UInt dD = dregNo(i->ARMin.VCMovD.dst);
   3527          UInt dM = dregNo(i->ARMin.VCMovD.src);
   3528          vassert(cc < 16 && cc != ARMcc_AL);
   3529          UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3530          *p++ = insn;
   3531          goto done;
   3532       }
   3533       case ARMin_VCMovS: {
   3534          UInt cc = (UInt)i->ARMin.VCMovS.cond;
   3535          UInt fD = fregNo(i->ARMin.VCMovS.dst);
   3536          UInt fM = fregNo(i->ARMin.VCMovS.src);
   3537          vassert(cc < 16 && cc != ARMcc_AL);
   3538          UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
   3539                               X0000,(fD >> 1),X1010,
   3540                               BITS4(0,1,(fM & 1),0), (fM >> 1));
   3541          *p++ = insn;
   3542          goto done;
   3543       }
   3544       case ARMin_VCvtSD: {
   3545          if (i->ARMin.VCvtSD.sToD) {
   3546             UInt dD = dregNo(i->ARMin.VCvtSD.dst);
   3547             UInt fM = fregNo(i->ARMin.VCvtSD.src);
   3548             UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
   3549                                  BITS4(1,1, (fM & 1), 0),
   3550                                  (fM >> 1));
   3551             *p++ = insn;
   3552             goto done;
   3553          } else {
   3554             UInt fD = fregNo(i->ARMin.VCvtSD.dst);
   3555             UInt dM = dregNo(i->ARMin.VCvtSD.src);
   3556             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
   3557                                  X0111, (fD >> 1),
   3558                                  X1011, X1100, dM);
   3559             *p++ = insn;
   3560             goto done;
   3561          }
   3562          goto bad;
   3563       }
   3564       case ARMin_VXferD: {
   3565          UInt dD  = dregNo(i->ARMin.VXferD.dD);
   3566          UInt rHi = iregNo(i->ARMin.VXferD.rHi);
   3567          UInt rLo = iregNo(i->ARMin.VXferD.rLo);
   3568          /* vmov dD, rLo, rHi is
   3569             E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3570             vmov rLo, rHi, dD is
   3571             E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3572          */
   3573          UInt insn
   3574             = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
   3575                        rHi, rLo, 0xB,
   3576                        BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
   3577          *p++ = insn;
   3578          goto done;
   3579       }
   3580       case ARMin_VXferS: {
   3581          UInt fD  = fregNo(i->ARMin.VXferS.fD);
   3582          UInt rLo = iregNo(i->ARMin.VXferS.rLo);
   3583          /* vmov fD, rLo is
   3584             E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
   3585             vmov rLo, fD is
   3586             E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
   3587          */
   3588          UInt insn
   3589             = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
   3590                        (fD >> 1) & 0xF, rLo, 0xA,
   3591                        BITS4((fD & 1),0,0,1), 0);
   3592          *p++ = insn;
   3593          goto done;
   3594       }
   3595       case ARMin_VCvtID: {
   3596          Bool iToD = i->ARMin.VCvtID.iToD;
   3597          Bool syned = i->ARMin.VCvtID.syned;
   3598          if (iToD && syned) {
   3599             // FSITOD: I32S-in-freg to F64-in-dreg
   3600             UInt regF = fregNo(i->ARMin.VCvtID.src);
   3601             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   3602             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3603                                  X1011, BITS4(1,1,(regF & 1),0),
   3604                                  (regF >> 1) & 0xF);
   3605             *p++ = insn;
   3606             goto done;
   3607          }
   3608          if (iToD && (!syned)) {
   3609             // FUITOD: I32U-in-freg to F64-in-dreg
   3610             UInt regF = fregNo(i->ARMin.VCvtID.src);
   3611             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   3612             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3613                                  X1011, BITS4(0,1,(regF & 1),0),
   3614                                  (regF >> 1) & 0xF);
   3615             *p++ = insn;
   3616             goto done;
   3617          }
   3618          if ((!iToD) && syned) {
   3619             // FTOSID: F64-in-dreg to I32S-in-freg
   3620             UInt regD = dregNo(i->ARMin.VCvtID.src);
   3621             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   3622             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3623                                  X1101, (regF >> 1) & 0xF,
   3624                                  X1011, X0100, regD);
   3625             *p++ = insn;
   3626             goto done;
   3627          }
   3628          if ((!iToD) && (!syned)) {
   3629             // FTOUID: F64-in-dreg to I32U-in-freg
   3630             UInt regD = dregNo(i->ARMin.VCvtID.src);
   3631             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   3632             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3633                                  X1100, (regF >> 1) & 0xF,
   3634                                  X1011, X0100, regD);
   3635             *p++ = insn;
   3636             goto done;
   3637          }
   3638          /*UNREACHED*/
   3639          vassert(0);
   3640       }
   3641       case ARMin_FPSCR: {
   3642          Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
   3643          HReg iReg    = iregNo(i->ARMin.FPSCR.iReg);
   3644          if (toFPSCR) {
   3645             /* fmxr fpscr, iReg is EEE1 iReg A10 */
   3646             *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
   3647             goto done;
   3648          }
   3649          goto bad; // FPSCR -> iReg case currently ATC
   3650       }
   3651       case ARMin_MFence: {
   3652          // It's not clear (to me) how these relate to the ARMv7
   3653          // versions, so let's just use the v7 versions as they
   3654          // are at least well documented.
   3655          //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
   3656          //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
   3657          //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
   3658          *p++ = 0xF57FF04F; /* DSB sy */
   3659          *p++ = 0xF57FF05F; /* DMB sy */
   3660          *p++ = 0xF57FF06F; /* ISB */
   3661          goto done;
   3662       }
   3663       case ARMin_CLREX: {
   3664          *p++ = 0xF57FF01F; /* clrex */
   3665          goto done;
   3666       }
   3667 
   3668       case ARMin_NLdStQ: {
   3669          UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
   3670          UInt regN, regM;
   3671          UInt D = regD >> 4;
   3672          UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
   3673          UInt insn;
   3674          vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
   3675          regD &= 0xF;
   3676          if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
   3677             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
   3678             regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
   3679          } else {
   3680             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
   3681             regM = 15;
   3682          }
   3683          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   3684                               regN, regD, X1010, X1000, regM);
   3685          *p++ = insn;
   3686          goto done;
   3687       }
   3688       case ARMin_NLdStD: {
   3689          UInt regD = dregNo(i->ARMin.NLdStD.dD);
   3690          UInt regN, regM;
   3691          UInt D = regD >> 4;
   3692          UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
   3693          UInt insn;
   3694          vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
   3695          regD &= 0xF;
   3696          if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
   3697             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
   3698             regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
   3699          } else {
   3700             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
   3701             regM = 15;
   3702          }
   3703          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   3704                               regN, regD, X0111, X1000, regM);
   3705          *p++ = insn;
   3706          goto done;
   3707       }
   3708       case ARMin_NUnaryS: {
   3709          UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
   3710          UInt regD, D;
   3711          UInt regM, M;
   3712          UInt size = i->ARMin.NUnaryS.size;
   3713          UInt insn;
   3714          UInt opc, opc1, opc2;
   3715          switch (i->ARMin.NUnaryS.op) {
   3716 	    case ARMneon_VDUP:
   3717                if (i->ARMin.NUnaryS.size >= 16)
   3718                   goto bad;
   3719                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
   3720                   goto bad;
   3721                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3722                   goto bad;
   3723                regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
   3724                         ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
   3725                         : dregNo(i->ARMin.NUnaryS.dst->reg);
   3726                regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
   3727                         ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
   3728                         : dregNo(i->ARMin.NUnaryS.src->reg);
   3729                D = regD >> 4;
   3730                M = regM >> 4;
   3731                regD &= 0xf;
   3732                regM &= 0xf;
   3733                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
   3734                                (i->ARMin.NUnaryS.size & 0xf), regD,
   3735                                X1100, BITS4(0,Q,M,0), regM);
   3736                *p++ = insn;
   3737                goto done;
   3738             case ARMneon_SETELEM:
   3739                regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
   3740                                 dregNo(i->ARMin.NUnaryS.dst->reg);
   3741                regM = iregNo(i->ARMin.NUnaryS.src->reg);
   3742                M = regM >> 4;
   3743                D = regD >> 4;
   3744                regM &= 0xF;
   3745                regD &= 0xF;
   3746                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
   3747                   goto bad;
   3748                switch (size) {
   3749                   case 0:
   3750                      if (i->ARMin.NUnaryS.dst->index > 7)
   3751                         goto bad;
   3752                      opc = X1000 | i->ARMin.NUnaryS.dst->index;
   3753                      break;
   3754                   case 1:
   3755                      if (i->ARMin.NUnaryS.dst->index > 3)
   3756                         goto bad;
   3757                      opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
   3758                      break;
   3759                   case 2:
   3760                      if (i->ARMin.NUnaryS.dst->index > 1)
   3761                         goto bad;
   3762                      opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
   3763                      break;
   3764                   default:
   3765                      goto bad;
   3766                }
   3767                opc1 = (opc >> 2) & 3;
   3768                opc2 = opc & 3;
   3769                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
   3770                                regD, regM, X1011,
   3771                                BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
   3772                *p++ = insn;
   3773                goto done;
   3774             case ARMneon_GETELEMU:
   3775                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   3776                                 dregNo(i->ARMin.NUnaryS.src->reg);
   3777                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   3778                M = regM >> 4;
   3779                D = regD >> 4;
   3780                regM &= 0xF;
   3781                regD &= 0xF;
   3782                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3783                   goto bad;
   3784                switch (size) {
   3785                   case 0:
   3786                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   3787                         regM++;
   3788                         i->ARMin.NUnaryS.src->index -= 8;
   3789                      }
   3790                      if (i->ARMin.NUnaryS.src->index > 7)
   3791                         goto bad;
   3792                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   3793                      break;
   3794                   case 1:
   3795                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   3796                         regM++;
   3797                         i->ARMin.NUnaryS.src->index -= 4;
   3798                      }
   3799                      if (i->ARMin.NUnaryS.src->index > 3)
   3800                         goto bad;
   3801                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   3802                      break;
   3803                   case 2:
   3804                      goto bad;
   3805                   default:
   3806                      goto bad;
   3807                }
   3808                opc1 = (opc >> 2) & 3;
   3809                opc2 = opc & 3;
   3810                insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
   3811                                regM, regD, X1011,
   3812                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   3813                *p++ = insn;
   3814                goto done;
   3815             case ARMneon_GETELEMS:
   3816                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   3817                                 dregNo(i->ARMin.NUnaryS.src->reg);
   3818                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   3819                M = regM >> 4;
   3820                D = regD >> 4;
   3821                regM &= 0xF;
   3822                regD &= 0xF;
   3823                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3824                   goto bad;
   3825                switch (size) {
   3826                   case 0:
   3827                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   3828                         regM++;
   3829                         i->ARMin.NUnaryS.src->index -= 8;
   3830                      }
   3831                      if (i->ARMin.NUnaryS.src->index > 7)
   3832                         goto bad;
   3833                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   3834                      break;
   3835                   case 1:
   3836                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   3837                         regM++;
   3838                         i->ARMin.NUnaryS.src->index -= 4;
   3839                      }
   3840                      if (i->ARMin.NUnaryS.src->index > 3)
   3841                         goto bad;
   3842                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   3843                      break;
   3844                   case 2:
   3845                      if (Q && i->ARMin.NUnaryS.src->index > 1) {
   3846                         regM++;
   3847                         i->ARMin.NUnaryS.src->index -= 2;
   3848                      }
   3849                      if (i->ARMin.NUnaryS.src->index > 1)
   3850                         goto bad;
   3851                      opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
   3852                      break;
   3853                   default:
   3854                      goto bad;
   3855                }
   3856                opc1 = (opc >> 2) & 3;
   3857                opc2 = opc & 3;
   3858                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
   3859                                regM, regD, X1011,
   3860                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   3861                *p++ = insn;
   3862                goto done;
   3863             default:
   3864                goto bad;
   3865          }
   3866       }
   3867       case ARMin_NUnary: {
   3868          UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
   3869          UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
   3870                        ? (qregNo(i->ARMin.NUnary.dst) << 1)
   3871                        : dregNo(i->ARMin.NUnary.dst);
   3872          UInt regM, M;
   3873          UInt D = regD >> 4;
   3874          UInt sz1 = i->ARMin.NUnary.size >> 1;
   3875          UInt sz2 = i->ARMin.NUnary.size & 1;
   3876          UInt sz = i->ARMin.NUnary.size;
   3877          UInt insn;
   3878          UInt F = 0; /* TODO: floating point EQZ ??? */
   3879          if (i->ARMin.NUnary.op != ARMneon_DUP) {
   3880             regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
   3881                      ? (qregNo(i->ARMin.NUnary.src) << 1)
   3882                      : dregNo(i->ARMin.NUnary.src);
   3883             M = regM >> 4;
   3884          } else {
   3885             regM = iregNo(i->ARMin.NUnary.src);
   3886             M = regM >> 4;
   3887          }
   3888          regD &= 0xF;
   3889          regM &= 0xF;
   3890          switch (i->ARMin.NUnary.op) {
   3891             case ARMneon_COPY: /* VMOV reg, reg */
   3892                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
   3893                                BITS4(M,Q,M,1), regM);
   3894                break;
   3895             case ARMneon_COPYN: /* VMOVN regD, regQ */
   3896                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3897                                regD, X0010, BITS4(0,0,M,0), regM);
   3898                break;
   3899             case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
   3900                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3901                                regD, X0010, BITS4(1,0,M,0), regM);
   3902                break;
   3903             case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
   3904                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3905                                regD, X0010, BITS4(0,1,M,0), regM);
   3906                break;
   3907             case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
   3908                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   3909                                regD, X0010, BITS4(1,1,M,0), regM);
   3910                break;
   3911             case ARMneon_COPYLS: /* VMOVL regQ, regD */
   3912                if (sz >= 3)
   3913                   goto bad;
   3914                insn = XXXXXXXX(0xF, X0010,
   3915                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   3916                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   3917                                regD, X1010, BITS4(0,0,M,1), regM);
   3918                break;
   3919             case ARMneon_COPYLU: /* VMOVL regQ, regD */
   3920                if (sz >= 3)
   3921                   goto bad;
   3922                insn = XXXXXXXX(0xF, X0011,
   3923                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   3924                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   3925                                regD, X1010, BITS4(0,0,M,1), regM);
   3926                break;
   3927             case ARMneon_NOT: /* VMVN reg, reg*/
   3928                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   3929                                BITS4(1,Q,M,0), regM);
   3930                break;
   3931             case ARMneon_EQZ:
   3932                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   3933                                regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
   3934                break;
   3935             case ARMneon_CNT:
   3936                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   3937                                BITS4(0,Q,M,0), regM);
   3938                break;
   3939             case ARMneon_CLZ:
   3940                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3941                                regD, X0100, BITS4(1,Q,M,0), regM);
   3942                break;
   3943             case ARMneon_CLS:
   3944                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3945                                regD, X0100, BITS4(0,Q,M,0), regM);
   3946                break;
   3947             case ARMneon_ABS:
   3948                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   3949                                regD, X0011, BITS4(0,Q,M,0), regM);
   3950                break;
   3951             case ARMneon_DUP:
   3952                sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
   3953                sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
   3954                vassert(sz1 + sz2 < 2);
   3955                insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
   3956                                X1011, BITS4(D,0,sz2,1), X0000);
   3957                break;
   3958             case ARMneon_REV16:
   3959                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3960                                regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
   3961                break;
   3962             case ARMneon_REV32:
   3963                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3964                                regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
   3965                break;
   3966             case ARMneon_REV64:
   3967                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3968                                regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
   3969                break;
   3970             case ARMneon_PADDLU:
   3971                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3972                                regD, X0010, BITS4(1,Q,M,0), regM);
   3973                break;
   3974             case ARMneon_PADDLS:
   3975                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   3976                                regD, X0010, BITS4(0,Q,M,0), regM);
   3977                break;
   3978             case ARMneon_VQSHLNUU:
   3979                insn = XXXXXXXX(0xF, X0011,
   3980                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3981                                sz & 0xf, regD, X0111,
   3982                                BITS4(sz >> 6,Q,M,1), regM);
   3983                break;
   3984             case ARMneon_VQSHLNSS:
   3985                insn = XXXXXXXX(0xF, X0010,
   3986                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3987                                sz & 0xf, regD, X0111,
   3988                                BITS4(sz >> 6,Q,M,1), regM);
   3989                break;
   3990             case ARMneon_VQSHLNUS:
   3991                insn = XXXXXXXX(0xF, X0011,
   3992                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   3993                                sz & 0xf, regD, X0110,
   3994                                BITS4(sz >> 6,Q,M,1), regM);
   3995                break;
   3996             case ARMneon_VCVTFtoS:
   3997                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   3998                                BITS4(0,Q,M,0), regM);
   3999                break;
   4000             case ARMneon_VCVTFtoU:
   4001                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   4002                                BITS4(1,Q,M,0), regM);
   4003                break;
   4004             case ARMneon_VCVTStoF:
   4005                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   4006                                BITS4(0,Q,M,0), regM);
   4007                break;
   4008             case ARMneon_VCVTUtoF:
   4009                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   4010                                BITS4(1,Q,M,0), regM);
   4011                break;
   4012             case ARMneon_VCVTFtoFixedU:
   4013                sz1 = (sz >> 5) & 1;
   4014                sz2 = (sz >> 4) & 1;
   4015                sz &= 0xf;
   4016                insn = XXXXXXXX(0xF, X0011,
   4017                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   4018                                BITS4(0,Q,M,1), regM);
   4019                break;
   4020             case ARMneon_VCVTFtoFixedS:
   4021                sz1 = (sz >> 5) & 1;
   4022                sz2 = (sz >> 4) & 1;
   4023                sz &= 0xf;
   4024                insn = XXXXXXXX(0xF, X0010,
   4025                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   4026                                BITS4(0,Q,M,1), regM);
   4027                break;
   4028             case ARMneon_VCVTFixedUtoF:
   4029                sz1 = (sz >> 5) & 1;
   4030                sz2 = (sz >> 4) & 1;
   4031                sz &= 0xf;
   4032                insn = XXXXXXXX(0xF, X0011,
   4033                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   4034                                BITS4(0,Q,M,1), regM);
   4035                break;
   4036             case ARMneon_VCVTFixedStoF:
   4037                sz1 = (sz >> 5) & 1;
   4038                sz2 = (sz >> 4) & 1;
   4039                sz &= 0xf;
   4040                insn = XXXXXXXX(0xF, X0010,
   4041                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   4042                                BITS4(0,Q,M,1), regM);
   4043                break;
   4044             case ARMneon_VCVTF32toF16:
   4045                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
   4046                                BITS4(0,0,M,0), regM);
   4047                break;
   4048             case ARMneon_VCVTF16toF32:
   4049                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
   4050                                BITS4(0,0,M,0), regM);
   4051                break;
   4052             case ARMneon_VRECIP:
   4053                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   4054                                BITS4(0,Q,M,0), regM);
   4055                break;
   4056             case ARMneon_VRECIPF:
   4057                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   4058                                BITS4(0,Q,M,0), regM);
   4059                break;
   4060             case ARMneon_VABSFP:
   4061                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   4062                                BITS4(0,Q,M,0), regM);
   4063                break;
   4064             case ARMneon_VRSQRTEFP:
   4065                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   4066                                BITS4(1,Q,M,0), regM);
   4067                break;
   4068             case ARMneon_VRSQRTE:
   4069                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   4070                                BITS4(1,Q,M,0), regM);
   4071                break;
   4072             case ARMneon_VNEGF:
   4073                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   4074                                BITS4(1,Q,M,0), regM);
   4075                break;
   4076 
   4077             default:
   4078                goto bad;
   4079          }
   4080          *p++ = insn;
   4081          goto done;
   4082       }
   4083       case ARMin_NDual: {
   4084          UInt Q = i->ARMin.NDual.Q ? 1 : 0;
   4085          UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
   4086                        ? (qregNo(i->ARMin.NDual.arg1) << 1)
   4087                        : dregNo(i->ARMin.NDual.arg1);
   4088          UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
   4089                        ? (qregNo(i->ARMin.NDual.arg2) << 1)
   4090                        : dregNo(i->ARMin.NDual.arg2);
   4091          UInt D = regD >> 4;
   4092          UInt M = regM >> 4;
   4093          UInt sz1 = i->ARMin.NDual.size >> 1;
   4094          UInt sz2 = i->ARMin.NDual.size & 1;
   4095          UInt insn;
   4096          regD &= 0xF;
   4097          regM &= 0xF;
   4098          switch (i->ARMin.NDual.op) {
   4099             case ARMneon_TRN: /* VTRN reg, reg */
   4100                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4101                                regD, X0000, BITS4(1,Q,M,0), regM);
   4102                break;
   4103             case ARMneon_ZIP: /* VZIP reg, reg */
   4104                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4105                                regD, X0001, BITS4(1,Q,M,0), regM);
   4106                break;
   4107             case ARMneon_UZP: /* VUZP reg, reg */
   4108                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4109                                regD, X0001, BITS4(0,Q,M,0), regM);
   4110                break;
   4111             default:
   4112                goto bad;
   4113          }
   4114          *p++ = insn;
   4115          goto done;
   4116       }
   4117       case ARMin_NBinary: {
   4118          UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
   4119          UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
   4120                        ? (qregNo(i->ARMin.NBinary.dst) << 1)
   4121                        : dregNo(i->ARMin.NBinary.dst);
   4122          UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
   4123                        ? (qregNo(i->ARMin.NBinary.argL) << 1)
   4124                        : dregNo(i->ARMin.NBinary.argL);
   4125          UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
   4126                        ? (qregNo(i->ARMin.NBinary.argR) << 1)
   4127                        : dregNo(i->ARMin.NBinary.argR);
   4128          UInt sz1 = i->ARMin.NBinary.size >> 1;
   4129          UInt sz2 = i->ARMin.NBinary.size & 1;
   4130          UInt D = regD >> 4;
   4131          UInt N = regN >> 4;
   4132          UInt M = regM >> 4;
   4133          UInt insn;
   4134          regD &= 0xF;
   4135          regM &= 0xF;
   4136          regN &= 0xF;
   4137          switch (i->ARMin.NBinary.op) {
   4138             case ARMneon_VAND: /* VAND reg, reg, reg */
   4139                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
   4140                                BITS4(N,Q,M,1), regM);
   4141                break;
   4142             case ARMneon_VORR: /* VORR reg, reg, reg*/
   4143                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
   4144                                BITS4(N,Q,M,1), regM);
   4145                break;
   4146             case ARMneon_VXOR: /* VEOR reg, reg, reg */
   4147                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
   4148                                BITS4(N,Q,M,1), regM);
   4149                break;
   4150             case ARMneon_VADD: /* VADD reg, reg, reg */
   4151                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4152                                X1000, BITS4(N,Q,M,0), regM);
   4153                break;
   4154             case ARMneon_VSUB: /* VSUB reg, reg, reg */
   4155                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4156                                X1000, BITS4(N,Q,M,0), regM);
   4157                break;
   4158             case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
   4159                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4160                                X0110, BITS4(N,Q,M,1), regM);
   4161                break;
   4162             case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
   4163                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4164                                X0110, BITS4(N,Q,M,1), regM);
   4165                break;
   4166             case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
   4167                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4168                                X0110, BITS4(N,Q,M,0), regM);
   4169                break;
   4170             case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
   4171                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4172                                X0110, BITS4(N,Q,M,0), regM);
   4173                break;
   4174             case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
   4175                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4176                                X0001, BITS4(N,Q,M,0), regM);
   4177                break;
   4178             case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
   4179                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4180                                X0001, BITS4(N,Q,M,0), regM);
   4181                break;
   4182             case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
   4183                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4184                                X0000, BITS4(N,Q,M,1), regM);
   4185                break;
   4186             case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
   4187                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4188                                X0000, BITS4(N,Q,M,1), regM);
   4189                break;
   4190             case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
   4191                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4192                                X0010, BITS4(N,Q,M,1), regM);
   4193                break;
   4194             case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
   4195                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4196                                X0010, BITS4(N,Q,M,1), regM);
   4197                break;
   4198             case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
   4199                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4200                                X0011, BITS4(N,Q,M,0), regM);
   4201                break;
   4202             case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
   4203                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4204                                X0011, BITS4(N,Q,M,0), regM);
   4205                break;
   4206             case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
   4207                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4208                                X0011, BITS4(N,Q,M,1), regM);
   4209                break;
   4210             case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
   4211                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4212                                X0011, BITS4(N,Q,M,1), regM);
   4213                break;
   4214             case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
   4215                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4216                                X1000, BITS4(N,Q,M,1), regM);
   4217                break;
   4218             case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
   4219                if (i->ARMin.NBinary.size >= 16)
   4220                   goto bad;
   4221                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
   4222                                i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
   4223                                regM);
   4224                break;
   4225             case ARMneon_VMUL:
   4226                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4227                                X1001, BITS4(N,Q,M,1), regM);
   4228                break;
   4229             case ARMneon_VMULLU:
   4230                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
   4231                                X1100, BITS4(N,0,M,0), regM);
   4232                break;
   4233             case ARMneon_VMULLS:
   4234                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4235                                X1100, BITS4(N,0,M,0), regM);
   4236                break;
   4237             case ARMneon_VMULP:
   4238                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4239                                X1001, BITS4(N,Q,M,1), regM);
   4240                break;
   4241             case ARMneon_VMULFP:
   4242                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4243                                X1101, BITS4(N,Q,M,1), regM);
   4244                break;
   4245             case ARMneon_VMULLP:
   4246                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4247                                X1110, BITS4(N,0,M,0), regM);
   4248                break;
   4249             case ARMneon_VQDMULH:
   4250                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4251                                X1011, BITS4(N,Q,M,0), regM);
   4252                break;
   4253             case ARMneon_VQRDMULH:
   4254                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4255                                X1011, BITS4(N,Q,M,0), regM);
   4256                break;
   4257             case ARMneon_VQDMULL:
   4258                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4259                                X1101, BITS4(N,0,M,0), regM);
   4260                break;
   4261             case ARMneon_VTBL:
   4262                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
   4263                                X1000, BITS4(N,0,M,0), regM);
   4264                break;
   4265             case ARMneon_VPADD:
   4266                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4267                                X1011, BITS4(N,Q,M,1), regM);
   4268                break;
   4269             case ARMneon_VPADDFP:
   4270                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4271                                X1101, BITS4(N,Q,M,0), regM);
   4272                break;
   4273             case ARMneon_VPMINU:
   4274                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4275                                X1010, BITS4(N,Q,M,1), regM);
   4276                break;
   4277             case ARMneon_VPMINS:
   4278                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4279                                X1010, BITS4(N,Q,M,1), regM);
   4280                break;
   4281             case ARMneon_VPMAXU:
   4282                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4283                                X1010, BITS4(N,Q,M,0), regM);
   4284                break;
   4285             case ARMneon_VPMAXS:
   4286                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4287                                X1010, BITS4(N,Q,M,0), regM);
   4288                break;
   4289             case ARMneon_VADDFP: /* VADD reg, reg, reg */
   4290                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   4291                                X1101, BITS4(N,Q,M,0), regM);
   4292                break;
   4293             case ARMneon_VSUBFP: /* VADD reg, reg, reg */
   4294                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   4295                                X1101, BITS4(N,Q,M,0), regM);
   4296                break;
   4297             case ARMneon_VABDFP: /* VABD reg, reg, reg */
   4298                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   4299                                X1101, BITS4(N,Q,M,0), regM);
   4300                break;
   4301             case ARMneon_VMINF:
   4302                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   4303                                X1111, BITS4(N,Q,M,0), regM);
   4304                break;
   4305             case ARMneon_VMAXF:
   4306                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   4307                                X1111, BITS4(N,Q,M,0), regM);
   4308                break;
   4309             case ARMneon_VPMINF:
   4310                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   4311                                X1111, BITS4(N,Q,M,0), regM);
   4312                break;
   4313             case ARMneon_VPMAXF:
   4314                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4315                                X1111, BITS4(N,Q,M,0), regM);
   4316                break;
   4317             case ARMneon_VRECPS:
   4318                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
   4319                                BITS4(N,Q,M,1), regM);
   4320                break;
   4321             case ARMneon_VCGTF:
   4322                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
   4323                                BITS4(N,Q,M,0), regM);
   4324                break;
   4325             case ARMneon_VCGEF:
   4326                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
   4327                                BITS4(N,Q,M,0), regM);
   4328                break;
   4329             case ARMneon_VCEQF:
   4330                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
   4331                                BITS4(N,Q,M,0), regM);
   4332                break;
   4333             case ARMneon_VRSQRTS:
   4334                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
   4335                                BITS4(N,Q,M,1), regM);
   4336                break;
   4337             default:
   4338                goto bad;
   4339          }
   4340          *p++ = insn;
   4341          goto done;
   4342       }
   4343       case ARMin_NShift: {
   4344          UInt Q = i->ARMin.NShift.Q ? 1 : 0;
   4345          UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
   4346                        ? (qregNo(i->ARMin.NShift.dst) << 1)
   4347                        : dregNo(i->ARMin.NShift.dst);
   4348          UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
   4349                        ? (qregNo(i->ARMin.NShift.argL) << 1)
   4350                        : dregNo(i->ARMin.NShift.argL);
   4351          UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
   4352                        ? (qregNo(i->ARMin.NShift.argR) << 1)
   4353                        : dregNo(i->ARMin.NShift.argR);
   4354          UInt sz1 = i->ARMin.NShift.size >> 1;
   4355          UInt sz2 = i->ARMin.NShift.size & 1;
   4356          UInt D = regD >> 4;
   4357          UInt N = regN >> 4;
   4358          UInt M = regM >> 4;
   4359          UInt insn;
   4360          regD &= 0xF;
   4361          regM &= 0xF;
   4362          regN &= 0xF;
   4363          switch (i->ARMin.NShift.op) {
   4364             case ARMneon_VSHL:
   4365                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4366                                X0100, BITS4(N,Q,M,0), regM);
   4367                break;
   4368             case ARMneon_VSAL:
   4369                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4370                                X0100, BITS4(N,Q,M,0), regM);
   4371                break;
   4372             case ARMneon_VQSHL:
   4373                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4374                                X0100, BITS4(N,Q,M,1), regM);
   4375                break;
   4376             case ARMneon_VQSAL:
   4377                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4378                                X0100, BITS4(N,Q,M,1), regM);
   4379                break;
   4380             default:
   4381                goto bad;
   4382          }
   4383          *p++ = insn;
   4384          goto done;
   4385       }
   4386       case ARMin_NeonImm: {
   4387          UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
   4388          UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
   4389                           dregNo(i->ARMin.NeonImm.dst);
   4390          UInt D = regD >> 4;
   4391          UInt imm = i->ARMin.NeonImm.imm->imm8;
   4392          UInt tp = i->ARMin.NeonImm.imm->type;
   4393          UInt j = imm >> 7;
   4394          UInt imm3 = (imm >> 4) & 0x7;
   4395          UInt imm4 = imm & 0xF;
   4396          UInt cmode, op;
   4397          UInt insn;
   4398          regD &= 0xF;
   4399          if (tp == 9)
   4400             op = 1;
   4401          else
   4402             op = 0;
   4403          switch (tp) {
   4404             case 0:
   4405             case 1:
   4406             case 2:
   4407             case 3:
   4408             case 4:
   4409             case 5:
   4410                cmode = tp << 1;
   4411                break;
   4412             case 9:
   4413             case 6:
   4414                cmode = 14;
   4415                break;
   4416             case 7:
   4417                cmode = 12;
   4418                break;
   4419             case 8:
   4420                cmode = 13;
   4421                break;
   4422             case 10:
   4423                cmode = 15;
   4424                break;
   4425             default:
   4426                vpanic("ARMin_NeonImm");
   4427 
   4428          }
   4429          insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
   4430                          cmode, BITS4(0,Q,op,1), imm4);
   4431          *p++ = insn;
   4432          goto done;
   4433       }
   4434       case ARMin_NCMovQ: {
   4435          UInt cc = (UInt)i->ARMin.NCMovQ.cond;
   4436          UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
   4437          UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
   4438          UInt vM = qM & 0xF;
   4439          UInt vD = qD & 0xF;
   4440          UInt M  = (qM >> 4) & 1;
   4441          UInt D  = (qD >> 4) & 1;
   4442          vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
   4443          /* b!cc here+8: !cc A00 0000 */
   4444          UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
   4445          *p++ = insn;
   4446          /* vmov qD, qM */
   4447          insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
   4448                          vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
   4449          *p++ = insn;
   4450          goto done;
   4451       }
   4452       case ARMin_Add32: {
   4453          UInt regD = iregNo(i->ARMin.Add32.rD);
   4454          UInt regN = iregNo(i->ARMin.Add32.rN);
   4455          UInt imm32 = i->ARMin.Add32.imm32;
   4456          vassert(regD != regN);
   4457          /* MOV regD, imm32 */
   4458          p = imm32_to_iregNo((UInt *)p, regD, imm32);
   4459          /* ADD regD, regN, regD */
   4460          UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
   4461          *p++ = insn;
   4462          goto done;
   4463       }
   4464 
   4465       case ARMin_EvCheck: {
   4466          /* We generate:
   4467                ldr  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
   4468                subs r12, r12, #1  (A1)
   4469                str  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
   4470                bpl  nofail
   4471                ldr  r12, [r8 + #0]   0 == offsetof(host_EvC_FAILADDR)
   4472                bx   r12
   4473               nofail:
   4474          */
   4475          UInt* p0 = p;
   4476          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
   4477                                 i->ARMin.EvCheck.amCounter);
   4478          *p++ = 0xE25CC001; /* subs r12, r12, #1 */
   4479          p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
   4480                                 i->ARMin.EvCheck.amCounter);
   4481          *p++ = 0x5A000001; /* bpl nofail */
   4482          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
   4483                                 i->ARMin.EvCheck.amFailAddr);
   4484          *p++ = 0xE12FFF1C; /* bx r12 */
   4485          /* nofail: */
   4486 
   4487          /* Crosscheck */
   4488          vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
   4489          goto done;
   4490       }
   4491 
   4492       case ARMin_ProfInc: {
   4493          /* We generate:
   4494               (ctrP is unknown now, so use 0x65556555 in the
   4495               expectation that a later call to LibVEX_patchProfCtr
   4496               will be used to fill in the immediate fields once the
   4497               right value is known.)
   4498             movw r12, lo16(0x65556555)
   4499             movt r12, lo16(0x65556555)
   4500             ldr  r11, [r12]
   4501             adds r11, r11, #1
   4502             str  r11, [r12]
   4503             ldr  r11, [r12+4]
   4504             adc  r11, r11, #0
   4505             str  r11, [r12+4]
   4506          */
   4507          p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
   4508          *p++ = 0xE59CB000;
   4509          *p++ = 0xE29BB001;
   4510          *p++ = 0xE58CB000;
   4511          *p++ = 0xE59CB004;
   4512          *p++ = 0xE2ABB000;
   4513          *p++ = 0xE58CB004;
   4514          /* Tell the caller .. */
   4515          vassert(!(*is_profInc));
   4516          *is_profInc = True;
   4517          goto done;
   4518       }
   4519 
   4520       /* ... */
   4521       default:
   4522          goto bad;
   4523     }
   4524 
   4525   bad:
   4526    ppARMInstr(i);
   4527    vpanic("emit_ARMInstr");
   4528    /*NOTREACHED*/
   4529 
   4530   done:
   4531    vassert(((UChar*)p) - &buf[0] <= 32);
   4532    return ((UChar*)p) - &buf[0];
   4533 }
   4534 
   4535 
   4536 /* How big is an event check?  See case for ARMin_EvCheck in
   4537    emit_ARMInstr just above.  That crosschecks what this returns, so
   4538    we can tell if we're inconsistent. */
   4539 Int evCheckSzB_ARM ( void )
   4540 {
   4541    return 24;
   4542 }
   4543 
   4544 
   4545 /* NB: what goes on here has to be very closely coordinated with the
   4546    emitInstr case for XDirect, above. */
   4547 VexInvalRange chainXDirect_ARM ( void* place_to_chain,
   4548                                  void* disp_cp_chain_me_EXPECTED,
   4549                                  void* place_to_jump_to )
   4550 {
   4551    /* What we're expecting to see is:
   4552         movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
   4553         movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
   4554         blx  r12
   4555       viz
   4556         <8 bytes generated by imm32_to_iregNo_EXACTLY2>
   4557         E1 2F FF 3C
   4558    */
   4559    UInt* p = (UInt*)place_to_chain;
   4560    vassert(0 == (3 & (HWord)p));
   4561    vassert(is_imm32_to_iregNo_EXACTLY2(
   4562               p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
   4563    vassert(p[2] == 0xE12FFF3C);
   4564    /* And what we want to change it to is either:
   4565         (general case)
   4566           movw r12, lo16(place_to_jump_to)
   4567           movt r12, hi16(place_to_jump_to)
   4568           bx   r12
   4569         viz
   4570           <8 bytes generated by imm32_to_iregNo_EXACTLY2>
   4571           E1 2F FF 1C
   4572       ---OR---
   4573         in the case where the displacement falls within 26 bits
   4574           b disp24; undef; undef
   4575         viz
   4576           EA <3 bytes == disp24>
   4577           FF 00 00 00
   4578           FF 00 00 00
   4579 
   4580       In both cases the replacement has the same length as the original.
   4581       To remain sane & verifiable,
   4582       (1) limit the displacement for the short form to
   4583           (say) +/- 30 million, so as to avoid wraparound
   4584           off-by-ones
   4585       (2) even if the short form is applicable, once every (say)
   4586           1024 times use the long form anyway, so as to maintain
   4587           verifiability
   4588    */
   4589 
   4590    /* This is the delta we need to put into a B insn.  It's relative
   4591       to the start of the next-but-one insn, hence the -8.  */
   4592    Long delta   = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)8;
   4593    Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
   4594    vassert(0 == (delta & (Long)3));
   4595 
   4596    static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
   4597    if (shortOK) {
   4598       shortCTR++; // thread safety bleh
   4599       if (0 == (shortCTR & 0x3FF)) {
   4600          shortOK = False;
   4601          if (0)
   4602             vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
   4603                        "using long form\n", shortCTR);
   4604       }
   4605    }
   4606 
   4607    /* And make the modifications. */
   4608    if (shortOK) {
   4609       Int simm24 = (Int)(delta >> 2);
   4610       vassert(simm24 == ((simm24 << 8) >> 8));
   4611       p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
   4612       p[1] = 0xFF000000;
   4613       p[2] = 0xFF000000;
   4614    } else {
   4615       (void)imm32_to_iregNo_EXACTLY2(
   4616                p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to));
   4617       p[2] = 0xE12FFF1C;
   4618    }
   4619 
   4620    VexInvalRange vir = {(HWord)p, 12};
   4621    return vir;
   4622 }
   4623 
   4624 
   4625 /* NB: what goes on here has to be very closely coordinated with the
   4626    emitInstr case for XDirect, above. */
   4627 VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
   4628                                    void* place_to_jump_to_EXPECTED,
   4629                                    void* disp_cp_chain_me )
   4630 {
   4631    /* What we're expecting to see is:
   4632         (general case)
   4633           movw r12, lo16(place_to_jump_to_EXPECTED)
   4634           movt r12, lo16(place_to_jump_to_EXPECTED)
   4635           bx   r12
   4636         viz
   4637           <8 bytes generated by imm32_to_iregNo_EXACTLY2>
   4638           E1 2F FF 1C
   4639       ---OR---
   4640         in the case where the displacement falls within 26 bits
   4641           b disp24; undef; undef
   4642         viz
   4643           EA <3 bytes == disp24>
   4644           FF 00 00 00
   4645           FF 00 00 00
   4646    */
   4647    UInt* p = (UInt*)place_to_unchain;
   4648    vassert(0 == (3 & (HWord)p));
   4649 
   4650    Bool valid = False;
   4651    if (is_imm32_to_iregNo_EXACTLY2(
   4652           p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED))
   4653        && p[2] == 0xE12FFF1C) {
   4654       valid = True; /* it's the long form */
   4655       if (0)
   4656          vex_printf("QQQ unchainXDirect_ARM: found long form\n");
   4657    } else
   4658    if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
   4659       /* It's the short form.  Check the displacement is right. */
   4660       Int simm24 = p[0] & 0x00FFFFFF;
   4661       simm24 <<= 8; simm24 >>= 8;
   4662       if ((UChar*)p + (simm24 << 2) + 8 == (UChar*)place_to_jump_to_EXPECTED) {
   4663          valid = True;
   4664          if (0)
   4665             vex_printf("QQQ unchainXDirect_ARM: found short form\n");
   4666       }
   4667    }
   4668    vassert(valid);
   4669 
   4670    /* And what we want to change it to is:
   4671         movw r12, lo16(disp_cp_chain_me)
   4672         movt r12, hi16(disp_cp_chain_me)
   4673         blx  r12
   4674       viz
   4675         <8 bytes generated by imm32_to_iregNo_EXACTLY2>
   4676         E1 2F FF 3C
   4677    */
   4678    (void)imm32_to_iregNo_EXACTLY2(
   4679             p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me));
   4680    p[2] = 0xE12FFF3C;
   4681    VexInvalRange vir = {(HWord)p, 12};
   4682    return vir;
   4683 }
   4684 
   4685 
   4686 /* Patch the counter address into a profile inc point, as previously
   4687    created by the ARMin_ProfInc case for emit_ARMInstr. */
   4688 VexInvalRange patchProfInc_ARM ( void*  place_to_patch,
   4689                                  ULong* location_of_counter )
   4690 {
   4691    vassert(sizeof(ULong*) == 4);
   4692    UInt* p = (UInt*)place_to_patch;
   4693    vassert(0 == (3 & (HWord)p));
   4694    vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
   4695    vassert(p[2] == 0xE59CB000);
   4696    vassert(p[3] == 0xE29BB001);
   4697    vassert(p[4] == 0xE58CB000);
   4698    vassert(p[5] == 0xE59CB004);
   4699    vassert(p[6] == 0xE2ABB000);
   4700    vassert(p[7] == 0xE58CB004);
   4701    imm32_to_iregNo_EXACTLY2(p, /*r*/12,
   4702                             (UInt)Ptr_to_ULong(location_of_counter));
   4703    VexInvalRange vir = {(HWord)p, 8};
   4704    return vir;
   4705 }
   4706 
   4707 
   4708 #undef BITS4
   4709 #undef X0000
   4710 #undef X0001
   4711 #undef X0010
   4712 #undef X0011
   4713 #undef X0100
   4714 #undef X0101
   4715 #undef X0110
   4716 #undef X0111
   4717 #undef X1000
   4718 #undef X1001
   4719 #undef X1010
   4720 #undef X1011
   4721 #undef X1100
   4722 #undef X1101
   4723 #undef X1110
   4724 #undef X1111
   4725 #undef XXXXX___
   4726 #undef XXXXXX__
   4727 #undef XXX___XX
   4728 #undef XXXXX__X
   4729 #undef XXXXXXXX
   4730 #undef XX______
   4731 
   4732 /*---------------------------------------------------------------*/
   4733 /*--- end                                     host_arm_defs.c ---*/
   4734 /*---------------------------------------------------------------*/
   4735