Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_arm_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2013 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2013 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_arm_defs.h"
     43 
     44 UInt arm_hwcaps = 0;
     45 
     46 
     47 /* --------- Registers. --------- */
     48 
     49 /* The usual HReg abstraction.
     50    There are 16 general purpose regs.
     51 */
     52 
     53 void ppHRegARM ( HReg reg )  {
     54    Int r;
     55    /* Be generic for all virtual regs. */
     56    if (hregIsVirtual(reg)) {
     57       ppHReg(reg);
     58       return;
     59    }
     60    /* But specific for real regs. */
     61    switch (hregClass(reg)) {
     62       case HRcInt32:
     63          r = hregNumber(reg);
     64          vassert(r >= 0 && r < 16);
     65          vex_printf("r%d", r);
     66          return;
     67       case HRcFlt64:
     68          r = hregNumber(reg);
     69          vassert(r >= 0 && r < 32);
     70          vex_printf("d%d", r);
     71          return;
     72       case HRcFlt32:
     73          r = hregNumber(reg);
     74          vassert(r >= 0 && r < 32);
     75          vex_printf("s%d", r);
     76          return;
     77       case HRcVec128:
     78          r = hregNumber(reg);
     79          vassert(r >= 0 && r < 16);
     80          vex_printf("q%d", r);
     81          return;
     82       default:
     83          vpanic("ppHRegARM");
     84    }
     85 }
     86 
     87 HReg hregARM_R0  ( void ) { return mkHReg(0,  HRcInt32, False); }
     88 HReg hregARM_R1  ( void ) { return mkHReg(1,  HRcInt32, False); }
     89 HReg hregARM_R2  ( void ) { return mkHReg(2,  HRcInt32, False); }
     90 HReg hregARM_R3  ( void ) { return mkHReg(3,  HRcInt32, False); }
     91 HReg hregARM_R4  ( void ) { return mkHReg(4,  HRcInt32, False); }
     92 HReg hregARM_R5  ( void ) { return mkHReg(5,  HRcInt32, False); }
     93 HReg hregARM_R6  ( void ) { return mkHReg(6,  HRcInt32, False); }
     94 HReg hregARM_R7  ( void ) { return mkHReg(7,  HRcInt32, False); }
     95 HReg hregARM_R8  ( void ) { return mkHReg(8,  HRcInt32, False); }
     96 HReg hregARM_R9  ( void ) { return mkHReg(9,  HRcInt32, False); }
     97 HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
     98 HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
     99 HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
    100 HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
    101 HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
    102 HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
    103 HReg hregARM_D8  ( void ) { return mkHReg(8,  HRcFlt64, False); }
    104 HReg hregARM_D9  ( void ) { return mkHReg(9,  HRcFlt64, False); }
    105 HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
    106 HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
    107 HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
    108 HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
    109 HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
    110 HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
    111 HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
    112 HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
    113 HReg hregARM_Q8  ( void ) { return mkHReg(8,  HRcVec128, False); }
    114 HReg hregARM_Q9  ( void ) { return mkHReg(9,  HRcVec128, False); }
    115 HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
    116 HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
    117 HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
    118 HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
    119 HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
    120 HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
    121 
    122 void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
    123 {
    124    Int i = 0;
    125    *nregs = 26;
    126    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    127    // callee saves ones are listed first, since we prefer them
    128    // if they're available
    129    (*arr)[i++] = hregARM_R4();
    130    (*arr)[i++] = hregARM_R5();
    131    (*arr)[i++] = hregARM_R6();
    132    (*arr)[i++] = hregARM_R7();
    133    (*arr)[i++] = hregARM_R10();
    134    (*arr)[i++] = hregARM_R11();
    135    // otherwise we'll have to slum it out with caller-saves ones
    136    (*arr)[i++] = hregARM_R0();
    137    (*arr)[i++] = hregARM_R1();
    138    (*arr)[i++] = hregARM_R2();
    139    (*arr)[i++] = hregARM_R3();
    140    (*arr)[i++] = hregARM_R9();
    141    // FP hreegisters.  Note: these are all callee-save.  Yay!
    142    // Hence we don't need to mention them as trashed in
    143    // getHRegUsage for ARMInstr_Call.
    144    (*arr)[i++] = hregARM_D8();
    145    (*arr)[i++] = hregARM_D9();
    146    (*arr)[i++] = hregARM_D10();
    147    (*arr)[i++] = hregARM_D11();
    148    (*arr)[i++] = hregARM_D12();
    149    (*arr)[i++] = hregARM_S26();
    150    (*arr)[i++] = hregARM_S27();
    151    (*arr)[i++] = hregARM_S28();
    152    (*arr)[i++] = hregARM_S29();
    153    (*arr)[i++] = hregARM_S30();
    154 
    155    (*arr)[i++] = hregARM_Q8();
    156    (*arr)[i++] = hregARM_Q9();
    157    (*arr)[i++] = hregARM_Q10();
    158    (*arr)[i++] = hregARM_Q11();
    159    (*arr)[i++] = hregARM_Q12();
    160 
    161    //(*arr)[i++] = hregARM_Q13();
    162    //(*arr)[i++] = hregARM_Q14();
    163    //(*arr)[i++] = hregARM_Q15();
    164 
    165    // unavail: r8 as GSP
    166    // r12 is used as a spill/reload temporary
    167    // r13 as SP
    168    // r14 as LR
    169    // r15 as PC
    170    //
    171    // All in all, we have 11 allocatable integer registers:
    172    // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
    173    // and r12 dedicated as a spill temporary.
    174    // 13 14 and 15 are not under the allocator's control.
    175    //
    176    // Hence for the allocatable registers we have:
    177    //
    178    // callee-saved: 4 5 6 7 (8) 9 10 11
    179    // caller-saved: 0 1 2 3
    180    // Note 9 is ambiguous: the base EABI does not give an e/r-saved
    181    // designation for it, but the Linux instantiation of the ABI
    182    // specifies it as callee-saved.
    183    //
    184    // If the set of available registers changes or if the e/r status
    185    // changes, be sure to re-check/sync the definition of
    186    // getHRegUsage for ARMInstr_Call too.
    187    vassert(i == *nregs);
    188 }
    189 
    190 
    191 
    192 /* --------- Condition codes, ARM encoding. --------- */
    193 
    194 const HChar* showARMCondCode ( ARMCondCode cond ) {
    195    switch (cond) {
    196        case ARMcc_EQ:  return "eq";
    197        case ARMcc_NE:  return "ne";
    198        case ARMcc_HS:  return "hs";
    199        case ARMcc_LO:  return "lo";
    200        case ARMcc_MI:  return "mi";
    201        case ARMcc_PL:  return "pl";
    202        case ARMcc_VS:  return "vs";
    203        case ARMcc_VC:  return "vc";
    204        case ARMcc_HI:  return "hi";
    205        case ARMcc_LS:  return "ls";
    206        case ARMcc_GE:  return "ge";
    207        case ARMcc_LT:  return "lt";
    208        case ARMcc_GT:  return "gt";
    209        case ARMcc_LE:  return "le";
    210        case ARMcc_AL:  return "al"; // default
    211        case ARMcc_NV:  return "nv";
    212        default: vpanic("showARMCondCode");
    213    }
    214 }
    215 
    216 
    217 /* --------- Mem AModes: Addressing Mode 1 --------- */
    218 
    219 ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
    220    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
    221    am->tag              = ARMam1_RI;
    222    am->ARMam1.RI.reg    = reg;
    223    am->ARMam1.RI.simm13 = simm13;
    224    vassert(-4095 <= simm13 && simm13 <= 4095);
    225    return am;
    226 }
    227 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
    228    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
    229    am->tag              = ARMam1_RRS;
    230    am->ARMam1.RRS.base  = base;
    231    am->ARMam1.RRS.index = index;
    232    am->ARMam1.RRS.shift = shift;
    233    vassert(0 <= shift && shift <= 3);
    234    return am;
    235 }
    236 
    237 void ppARMAMode1 ( ARMAMode1* am ) {
    238    switch (am->tag) {
    239       case ARMam1_RI:
    240          vex_printf("%d(", am->ARMam1.RI.simm13);
    241          ppHRegARM(am->ARMam1.RI.reg);
    242          vex_printf(")");
    243          break;
    244       case ARMam1_RRS:
    245          vex_printf("(");
    246          ppHRegARM(am->ARMam1.RRS.base);
    247          vex_printf(",");
    248          ppHRegARM(am->ARMam1.RRS.index);
    249          vex_printf(",%u)", am->ARMam1.RRS.shift);
    250          break;
    251       default:
    252          vassert(0);
    253    }
    254 }
    255 
    256 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
    257    switch (am->tag) {
    258       case ARMam1_RI:
    259          addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
    260          return;
    261       case ARMam1_RRS:
    262          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
    263          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
    264          //   return;
    265       default:
    266          vpanic("addRegUsage_ARMAmode1");
    267    }
    268 }
    269 
    270 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
    271    switch (am->tag) {
    272       case ARMam1_RI:
    273          am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
    274          return;
    275       case ARMam1_RRS:
    276          //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
    277          //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
    278          //return;
    279       default:
    280          vpanic("mapRegs_ARMAmode1");
    281    }
    282 }
    283 
    284 
    285 /* --------- Mem AModes: Addressing Mode 2 --------- */
    286 
    287 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
    288    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    289    am->tag             = ARMam2_RI;
    290    am->ARMam2.RI.reg   = reg;
    291    am->ARMam2.RI.simm9 = simm9;
    292    vassert(-255 <= simm9 && simm9 <= 255);
    293    return am;
    294 }
    295 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
    296    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
    297    am->tag             = ARMam2_RR;
    298    am->ARMam2.RR.base  = base;
    299    am->ARMam2.RR.index = index;
    300    return am;
    301 }
    302 
    303 void ppARMAMode2 ( ARMAMode2* am ) {
    304    switch (am->tag) {
    305       case ARMam2_RI:
    306          vex_printf("%d(", am->ARMam2.RI.simm9);
    307          ppHRegARM(am->ARMam2.RI.reg);
    308          vex_printf(")");
    309          break;
    310       case ARMam2_RR:
    311          vex_printf("(");
    312          ppHRegARM(am->ARMam2.RR.base);
    313          vex_printf(",");
    314          ppHRegARM(am->ARMam2.RR.index);
    315          vex_printf(")");
    316          break;
    317       default:
    318          vassert(0);
    319    }
    320 }
    321 
    322 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
    323    switch (am->tag) {
    324       case ARMam2_RI:
    325          addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
    326          return;
    327       case ARMam2_RR:
    328          //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
    329          //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
    330          //   return;
    331       default:
    332          vpanic("addRegUsage_ARMAmode2");
    333    }
    334 }
    335 
    336 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
    337    switch (am->tag) {
    338       case ARMam2_RI:
    339          am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
    340          return;
    341       case ARMam2_RR:
    342          //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
    343          //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
    344          //return;
    345       default:
    346          vpanic("mapRegs_ARMAmode2");
    347    }
    348 }
    349 
    350 
    351 /* --------- Mem AModes: Addressing Mode VFP --------- */
    352 
    353 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
    354    ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
    355    vassert(simm11 >= -1020 && simm11 <= 1020);
    356    vassert(0 == (simm11 & 3));
    357    am->reg    = reg;
    358    am->simm11 = simm11;
    359    return am;
    360 }
    361 
    362 void ppARMAModeV ( ARMAModeV* am ) {
    363    vex_printf("%d(", am->simm11);
    364    ppHRegARM(am->reg);
    365    vex_printf(")");
    366 }
    367 
    368 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
    369    addHRegUse(u, HRmRead, am->reg);
    370 }
    371 
    372 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
    373    am->reg = lookupHRegRemap(m, am->reg);
    374 }
    375 
    376 
    377 /* --------- Mem AModes: Addressing Mode Neon ------- */
    378 
    379 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
    380    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    381    am->tag = ARMamN_RR;
    382    am->ARMamN.RR.rN = rN;
    383    am->ARMamN.RR.rM = rM;
    384    return am;
    385 }
    386 
    387 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
    388    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
    389    am->tag = ARMamN_R;
    390    am->ARMamN.R.rN = rN;
    391    return am;
    392 }
    393 
    394 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
    395    if (am->tag == ARMamN_R) {
    396       addHRegUse(u, HRmRead, am->ARMamN.R.rN);
    397    } else {
    398       addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
    399       addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
    400    }
    401 }
    402 
    403 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
    404    if (am->tag == ARMamN_R) {
    405       am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
    406    } else {
    407       am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
    408       am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
    409    }
    410 }
    411 
    412 void ppARMAModeN ( ARMAModeN* am ) {
    413    vex_printf("[");
    414    if (am->tag == ARMamN_R) {
    415       ppHRegARM(am->ARMamN.R.rN);
    416    } else {
    417       ppHRegARM(am->ARMamN.RR.rN);
    418    }
    419    vex_printf("]");
    420    if (am->tag == ARMamN_RR) {
    421       vex_printf(", ");
    422       ppHRegARM(am->ARMamN.RR.rM);
    423    }
    424 }
    425 
    426 
    427 /* --------- Reg or imm-8x4 operands --------- */
    428 
    429 static UInt ROR32 ( UInt x, UInt sh ) {
    430    vassert(sh >= 0 && sh < 32);
    431    if (sh == 0)
    432       return x;
    433    else
    434       return (x << (32-sh)) | (x >> sh);
    435 }
    436 
    437 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
    438    ARMRI84* ri84          = LibVEX_Alloc(sizeof(ARMRI84));
    439    ri84->tag              = ARMri84_I84;
    440    ri84->ARMri84.I84.imm8 = imm8;
    441    ri84->ARMri84.I84.imm4 = imm4;
    442    vassert(imm8 >= 0 && imm8 <= 255);
    443    vassert(imm4 >= 0 && imm4 <= 15);
    444    return ri84;
    445 }
    446 ARMRI84* ARMRI84_R ( HReg reg ) {
    447    ARMRI84* ri84       = LibVEX_Alloc(sizeof(ARMRI84));
    448    ri84->tag           = ARMri84_R;
    449    ri84->ARMri84.R.reg = reg;
    450    return ri84;
    451 }
    452 
    453 void ppARMRI84 ( ARMRI84* ri84 ) {
    454    switch (ri84->tag) {
    455       case ARMri84_I84:
    456          vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
    457                                   2 * ri84->ARMri84.I84.imm4));
    458          break;
    459       case ARMri84_R:
    460          ppHRegARM(ri84->ARMri84.R.reg);
    461          break;
    462       default:
    463          vassert(0);
    464    }
    465 }
    466 
    467 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
    468    switch (ri84->tag) {
    469       case ARMri84_I84:
    470          return;
    471       case ARMri84_R:
    472          addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
    473          return;
    474       default:
    475          vpanic("addRegUsage_ARMRI84");
    476    }
    477 }
    478 
    479 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
    480    switch (ri84->tag) {
    481       case ARMri84_I84:
    482          return;
    483       case ARMri84_R:
    484          ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
    485          return;
    486       default:
    487          vpanic("mapRegs_ARMRI84");
    488    }
    489 }
    490 
    491 
    492 /* --------- Reg or imm5 operands --------- */
    493 
    494 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
    495    ARMRI5* ri5         = LibVEX_Alloc(sizeof(ARMRI5));
    496    ri5->tag            = ARMri5_I5;
    497    ri5->ARMri5.I5.imm5 = imm5;
    498    vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
    499    return ri5;
    500 }
    501 ARMRI5* ARMRI5_R ( HReg reg ) {
    502    ARMRI5* ri5       = LibVEX_Alloc(sizeof(ARMRI5));
    503    ri5->tag          = ARMri5_R;
    504    ri5->ARMri5.R.reg = reg;
    505    return ri5;
    506 }
    507 
    508 void ppARMRI5 ( ARMRI5* ri5 ) {
    509    switch (ri5->tag) {
    510       case ARMri5_I5:
    511          vex_printf("%u", ri5->ARMri5.I5.imm5);
    512          break;
    513       case ARMri5_R:
    514          ppHRegARM(ri5->ARMri5.R.reg);
    515          break;
    516       default:
    517          vassert(0);
    518    }
    519 }
    520 
    521 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
    522    switch (ri5->tag) {
    523       case ARMri5_I5:
    524          return;
    525       case ARMri5_R:
    526          addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
    527          return;
    528       default:
    529          vpanic("addRegUsage_ARMRI5");
    530    }
    531 }
    532 
    533 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
    534    switch (ri5->tag) {
    535       case ARMri5_I5:
    536          return;
    537       case ARMri5_R:
    538          ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
    539          return;
    540       default:
    541          vpanic("mapRegs_ARMRI5");
    542    }
    543 }
    544 
    545 /* -------- Neon Immediate operatnd --------- */
    546 
    547 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
    548    ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
    549    i->type = type;
    550    i->imm8 = imm8;
    551    return i;
    552 }
    553 
    554 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
    555    int i, j;
    556    ULong y, x = imm->imm8;
    557    switch (imm->type) {
    558       case 3:
    559          x = x << 8; /* fallthrough */
    560       case 2:
    561          x = x << 8; /* fallthrough */
    562       case 1:
    563          x = x << 8; /* fallthrough */
    564       case 0:
    565          return (x << 32) | x;
    566       case 5:
    567       case 6:
    568          if (imm->type == 5)
    569             x = x << 8;
    570          else
    571             x = (x << 8) | x;
    572          /* fallthrough */
    573       case 4:
    574          x = (x << 16) | x;
    575          return (x << 32) | x;
    576       case 8:
    577          x = (x << 8) | 0xFF;
    578          /* fallthrough */
    579       case 7:
    580          x = (x << 8) | 0xFF;
    581          return (x << 32) | x;
    582       case 9:
    583          x = 0;
    584          for (i = 7; i >= 0; i--) {
    585             y = ((ULong)imm->imm8 >> i) & 1;
    586             for (j = 0; j < 8; j++) {
    587                x = (x << 1) | y;
    588             }
    589          }
    590          return x;
    591       case 10:
    592          x |= (x & 0x80) << 5;
    593          x |= (~x & 0x40) << 5;
    594          x &= 0x187F; /* 0001 1000 0111 1111 */
    595          x |= (x & 0x40) << 4;
    596          x |= (x & 0x40) << 3;
    597          x |= (x & 0x40) << 2;
    598          x |= (x & 0x40) << 1;
    599          x = x << 19;
    600          x = (x << 32) | x;
    601          return x;
    602       default:
    603          vpanic("ARMNImm_to_Imm64");
    604    }
    605 }
    606 
    607 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
    608    ARMNImm tmp;
    609    if ((x & 0xFFFFFFFF) == (x >> 32)) {
    610       if ((x & 0xFFFFFF00) == 0)
    611          return ARMNImm_TI(0, x & 0xFF);
    612       if ((x & 0xFFFF00FF) == 0)
    613          return ARMNImm_TI(1, (x >> 8) & 0xFF);
    614       if ((x & 0xFF00FFFF) == 0)
    615          return ARMNImm_TI(2, (x >> 16) & 0xFF);
    616       if ((x & 0x00FFFFFF) == 0)
    617          return ARMNImm_TI(3, (x >> 24) & 0xFF);
    618       if ((x & 0xFFFF00FF) == 0xFF)
    619          return ARMNImm_TI(7, (x >> 8) & 0xFF);
    620       if ((x & 0xFF00FFFF) == 0xFFFF)
    621          return ARMNImm_TI(8, (x >> 16) & 0xFF);
    622       if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
    623          if ((x & 0xFF00) == 0)
    624             return ARMNImm_TI(4, x & 0xFF);
    625          if ((x & 0x00FF) == 0)
    626             return ARMNImm_TI(5, (x >> 8) & 0xFF);
    627          if ((x & 0xFF) == ((x >> 8) & 0xFF))
    628             return ARMNImm_TI(6, x & 0xFF);
    629       }
    630       if ((x & 0x7FFFF) == 0) {
    631          tmp.type = 10;
    632          tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
    633          if (ARMNImm_to_Imm64(&tmp) == x)
    634             return ARMNImm_TI(tmp.type, tmp.imm8);
    635       }
    636    } else {
    637       /* This can only be type 9. */
    638       tmp.imm8 = (((x >> 56) & 1) << 7)
    639                | (((x >> 48) & 1) << 6)
    640                | (((x >> 40) & 1) << 5)
    641                | (((x >> 32) & 1) << 4)
    642                | (((x >> 24) & 1) << 3)
    643                | (((x >> 16) & 1) << 2)
    644                | (((x >>  8) & 1) << 1)
    645                | (((x >>  0) & 1) << 0);
    646       tmp.type = 9;
    647       if (ARMNImm_to_Imm64 (&tmp) == x)
    648          return ARMNImm_TI(tmp.type, tmp.imm8);
    649    }
    650    return NULL;
    651 }
    652 
    653 void ppARMNImm (ARMNImm* i) {
    654    ULong x = ARMNImm_to_Imm64(i);
    655    vex_printf("0x%llX%llX", x, x);
    656 }
    657 
    658 /* -- Register or scalar operand --- */
    659 
    660 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
    661 {
    662    ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
    663    p->tag = tag;
    664    p->reg = reg;
    665    p->index = index;
    666    return p;
    667 }
    668 
    669 void ppARMNRS(ARMNRS *p)
    670 {
    671    ppHRegARM(p->reg);
    672    if (p->tag == ARMNRS_Scalar) {
    673       vex_printf("[%d]", p->index);
    674    }
    675 }
    676 
    677 /* --------- Instructions. --------- */
    678 
    679 const HChar* showARMAluOp ( ARMAluOp op ) {
    680    switch (op) {
    681       case ARMalu_ADD:  return "add";
    682       case ARMalu_ADDS: return "adds";
    683       case ARMalu_ADC:  return "adc";
    684       case ARMalu_SUB:  return "sub";
    685       case ARMalu_SUBS: return "subs";
    686       case ARMalu_SBC:  return "sbc";
    687       case ARMalu_AND:  return "and";
    688       case ARMalu_BIC:  return "bic";
    689       case ARMalu_OR:   return "orr";
    690       case ARMalu_XOR:  return "xor";
    691       default: vpanic("showARMAluOp");
    692    }
    693 }
    694 
    695 const HChar* showARMShiftOp ( ARMShiftOp op ) {
    696    switch (op) {
    697       case ARMsh_SHL: return "shl";
    698       case ARMsh_SHR: return "shr";
    699       case ARMsh_SAR: return "sar";
    700       default: vpanic("showARMShiftOp");
    701    }
    702 }
    703 
    704 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
    705    switch (op) {
    706       case ARMun_NEG: return "neg";
    707       case ARMun_NOT: return "not";
    708       case ARMun_CLZ: return "clz";
    709       default: vpanic("showARMUnaryOp");
    710    }
    711 }
    712 
    713 const HChar* showARMMulOp ( ARMMulOp op ) {
    714    switch (op) {
    715       case ARMmul_PLAIN: return "mul";
    716       case ARMmul_ZX:    return "umull";
    717       case ARMmul_SX:    return "smull";
    718       default: vpanic("showARMMulOp");
    719    }
    720 }
    721 
    722 const HChar* showARMVfpOp ( ARMVfpOp op ) {
    723    switch (op) {
    724       case ARMvfp_ADD: return "add";
    725       case ARMvfp_SUB: return "sub";
    726       case ARMvfp_MUL: return "mul";
    727       case ARMvfp_DIV: return "div";
    728       default: vpanic("showARMVfpOp");
    729    }
    730 }
    731 
    732 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
    733    switch (op) {
    734       case ARMvfpu_COPY: return "cpy";
    735       case ARMvfpu_NEG:  return "neg";
    736       case ARMvfpu_ABS:  return "abs";
    737       case ARMvfpu_SQRT: return "sqrt";
    738       default: vpanic("showARMVfpUnaryOp");
    739    }
    740 }
    741 
    742 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
    743    switch (op) {
    744       case ARMneon_VAND: return "vand";
    745       case ARMneon_VORR: return "vorr";
    746       case ARMneon_VXOR: return "veor";
    747       case ARMneon_VADD: return "vadd";
    748       case ARMneon_VRHADDS: return "vrhadd";
    749       case ARMneon_VRHADDU: return "vrhadd";
    750       case ARMneon_VADDFP: return "vadd";
    751       case ARMneon_VPADDFP: return "vpadd";
    752       case ARMneon_VABDFP: return "vabd";
    753       case ARMneon_VSUB: return "vsub";
    754       case ARMneon_VSUBFP: return "vsub";
    755       case ARMneon_VMINU: return "vmin";
    756       case ARMneon_VMINS: return "vmin";
    757       case ARMneon_VMINF: return "vmin";
    758       case ARMneon_VMAXU: return "vmax";
    759       case ARMneon_VMAXS: return "vmax";
    760       case ARMneon_VMAXF: return "vmax";
    761       case ARMneon_VQADDU: return "vqadd";
    762       case ARMneon_VQADDS: return "vqadd";
    763       case ARMneon_VQSUBU: return "vqsub";
    764       case ARMneon_VQSUBS: return "vqsub";
    765       case ARMneon_VCGTU:  return "vcgt";
    766       case ARMneon_VCGTS:  return "vcgt";
    767       case ARMneon_VCGTF:  return "vcgt";
    768       case ARMneon_VCGEF:  return "vcgt";
    769       case ARMneon_VCGEU:  return "vcge";
    770       case ARMneon_VCGES:  return "vcge";
    771       case ARMneon_VCEQ:  return "vceq";
    772       case ARMneon_VCEQF:  return "vceq";
    773       case ARMneon_VPADD:   return "vpadd";
    774       case ARMneon_VPMINU:   return "vpmin";
    775       case ARMneon_VPMINS:   return "vpmin";
    776       case ARMneon_VPMINF:   return "vpmin";
    777       case ARMneon_VPMAXU:   return "vpmax";
    778       case ARMneon_VPMAXS:   return "vpmax";
    779       case ARMneon_VPMAXF:   return "vpmax";
    780       case ARMneon_VEXT:   return "vext";
    781       case ARMneon_VMUL:   return "vmuli";
    782       case ARMneon_VMULLU:   return "vmull";
    783       case ARMneon_VMULLS:   return "vmull";
    784       case ARMneon_VMULP:  return "vmul";
    785       case ARMneon_VMULFP:  return "vmul";
    786       case ARMneon_VMULLP:  return "vmul";
    787       case ARMneon_VQDMULH: return "vqdmulh";
    788       case ARMneon_VQRDMULH: return "vqrdmulh";
    789       case ARMneon_VQDMULL: return "vqdmull";
    790       case ARMneon_VTBL: return "vtbl";
    791       case ARMneon_VRECPS: return "vrecps";
    792       case ARMneon_VRSQRTS: return "vrecps";
    793       case ARMneon_INVALID: return "??invalid??";
    794       /* ... */
    795       default: vpanic("showARMNeonBinOp");
    796    }
    797 }
    798 
    799 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
    800    switch (op) {
    801       case ARMneon_VAND:
    802       case ARMneon_VORR:
    803       case ARMneon_VXOR:
    804          return "";
    805       case ARMneon_VADD:
    806       case ARMneon_VSUB:
    807       case ARMneon_VEXT:
    808       case ARMneon_VMUL:
    809       case ARMneon_VPADD:
    810       case ARMneon_VTBL:
    811       case ARMneon_VCEQ:
    812          return ".i";
    813       case ARMneon_VRHADDU:
    814       case ARMneon_VMINU:
    815       case ARMneon_VMAXU:
    816       case ARMneon_VQADDU:
    817       case ARMneon_VQSUBU:
    818       case ARMneon_VCGTU:
    819       case ARMneon_VCGEU:
    820       case ARMneon_VMULLU:
    821       case ARMneon_VPMINU:
    822       case ARMneon_VPMAXU:
    823          return ".u";
    824       case ARMneon_VRHADDS:
    825       case ARMneon_VMINS:
    826       case ARMneon_VMAXS:
    827       case ARMneon_VQADDS:
    828       case ARMneon_VQSUBS:
    829       case ARMneon_VCGTS:
    830       case ARMneon_VCGES:
    831       case ARMneon_VQDMULL:
    832       case ARMneon_VMULLS:
    833       case ARMneon_VPMINS:
    834       case ARMneon_VPMAXS:
    835       case ARMneon_VQDMULH:
    836       case ARMneon_VQRDMULH:
    837          return ".s";
    838       case ARMneon_VMULP:
    839       case ARMneon_VMULLP:
    840          return ".p";
    841       case ARMneon_VADDFP:
    842       case ARMneon_VABDFP:
    843       case ARMneon_VPADDFP:
    844       case ARMneon_VSUBFP:
    845       case ARMneon_VMULFP:
    846       case ARMneon_VMINF:
    847       case ARMneon_VMAXF:
    848       case ARMneon_VPMINF:
    849       case ARMneon_VPMAXF:
    850       case ARMneon_VCGTF:
    851       case ARMneon_VCGEF:
    852       case ARMneon_VCEQF:
    853       case ARMneon_VRECPS:
    854       case ARMneon_VRSQRTS:
    855          return ".f";
    856       /* ... */
    857       default: vpanic("showARMNeonBinOpDataType");
    858    }
    859 }
    860 
    861 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
    862    switch (op) {
    863       case ARMneon_COPY: return "vmov";
    864       case ARMneon_COPYLS: return "vmov";
    865       case ARMneon_COPYLU: return "vmov";
    866       case ARMneon_COPYN: return "vmov";
    867       case ARMneon_COPYQNSS: return "vqmovn";
    868       case ARMneon_COPYQNUS: return "vqmovun";
    869       case ARMneon_COPYQNUU: return "vqmovn";
    870       case ARMneon_NOT: return "vmvn";
    871       case ARMneon_EQZ: return "vceq";
    872       case ARMneon_CNT: return "vcnt";
    873       case ARMneon_CLS: return "vcls";
    874       case ARMneon_CLZ: return "vclz";
    875       case ARMneon_DUP: return "vdup";
    876       case ARMneon_PADDLS: return "vpaddl";
    877       case ARMneon_PADDLU: return "vpaddl";
    878       case ARMneon_VQSHLNSS: return "vqshl";
    879       case ARMneon_VQSHLNUU: return "vqshl";
    880       case ARMneon_VQSHLNUS: return "vqshlu";
    881       case ARMneon_REV16: return "vrev16";
    882       case ARMneon_REV32: return "vrev32";
    883       case ARMneon_REV64: return "vrev64";
    884       case ARMneon_VCVTFtoU: return "vcvt";
    885       case ARMneon_VCVTFtoS: return "vcvt";
    886       case ARMneon_VCVTUtoF: return "vcvt";
    887       case ARMneon_VCVTStoF: return "vcvt";
    888       case ARMneon_VCVTFtoFixedU: return "vcvt";
    889       case ARMneon_VCVTFtoFixedS: return "vcvt";
    890       case ARMneon_VCVTFixedUtoF: return "vcvt";
    891       case ARMneon_VCVTFixedStoF: return "vcvt";
    892       case ARMneon_VCVTF32toF16: return "vcvt";
    893       case ARMneon_VCVTF16toF32: return "vcvt";
    894       case ARMneon_VRECIP: return "vrecip";
    895       case ARMneon_VRECIPF: return "vrecipf";
    896       case ARMneon_VNEGF: return "vneg";
    897       case ARMneon_ABS: return "vabs";
    898       case ARMneon_VABSFP: return "vabsfp";
    899       case ARMneon_VRSQRTEFP: return "vrsqrtefp";
    900       case ARMneon_VRSQRTE: return "vrsqrte";
    901       /* ... */
    902       default: vpanic("showARMNeonUnOp");
    903    }
    904 }
    905 
    906 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
    907    switch (op) {
    908       case ARMneon_COPY:
    909       case ARMneon_NOT:
    910          return "";
    911       case ARMneon_COPYN:
    912       case ARMneon_EQZ:
    913       case ARMneon_CNT:
    914       case ARMneon_DUP:
    915       case ARMneon_REV16:
    916       case ARMneon_REV32:
    917       case ARMneon_REV64:
    918          return ".i";
    919       case ARMneon_COPYLU:
    920       case ARMneon_PADDLU:
    921       case ARMneon_COPYQNUU:
    922       case ARMneon_VQSHLNUU:
    923       case ARMneon_VRECIP:
    924       case ARMneon_VRSQRTE:
    925          return ".u";
    926       case ARMneon_CLS:
    927       case ARMneon_CLZ:
    928       case ARMneon_COPYLS:
    929       case ARMneon_PADDLS:
    930       case ARMneon_COPYQNSS:
    931       case ARMneon_COPYQNUS:
    932       case ARMneon_VQSHLNSS:
    933       case ARMneon_VQSHLNUS:
    934       case ARMneon_ABS:
    935          return ".s";
    936       case ARMneon_VRECIPF:
    937       case ARMneon_VNEGF:
    938       case ARMneon_VABSFP:
    939       case ARMneon_VRSQRTEFP:
    940          return ".f";
    941       case ARMneon_VCVTFtoU: return ".u32.f32";
    942       case ARMneon_VCVTFtoS: return ".s32.f32";
    943       case ARMneon_VCVTUtoF: return ".f32.u32";
    944       case ARMneon_VCVTStoF: return ".f32.s32";
    945       case ARMneon_VCVTF16toF32: return ".f32.f16";
    946       case ARMneon_VCVTF32toF16: return ".f16.f32";
    947       case ARMneon_VCVTFtoFixedU: return ".u32.f32";
    948       case ARMneon_VCVTFtoFixedS: return ".s32.f32";
    949       case ARMneon_VCVTFixedUtoF: return ".f32.u32";
    950       case ARMneon_VCVTFixedStoF: return ".f32.s32";
    951       /* ... */
    952       default: vpanic("showARMNeonUnOpDataType");
    953    }
    954 }
    955 
    956 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
    957    switch (op) {
    958       case ARMneon_SETELEM: return "vmov";
    959       case ARMneon_GETELEMU: return "vmov";
    960       case ARMneon_GETELEMS: return "vmov";
    961       case ARMneon_VDUP: return "vdup";
    962       /* ... */
    963       default: vpanic("showARMNeonUnarySOp");
    964    }
    965 }
    966 
    967 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
    968    switch (op) {
    969       case ARMneon_SETELEM:
    970       case ARMneon_VDUP:
    971          return ".i";
    972       case ARMneon_GETELEMS:
    973          return ".s";
    974       case ARMneon_GETELEMU:
    975          return ".u";
    976       /* ... */
    977       default: vpanic("showARMNeonUnarySOp");
    978    }
    979 }
    980 
    981 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
    982    switch (op) {
    983       case ARMneon_VSHL: return "vshl";
    984       case ARMneon_VSAL: return "vshl";
    985       case ARMneon_VQSHL: return "vqshl";
    986       case ARMneon_VQSAL: return "vqshl";
    987       /* ... */
    988       default: vpanic("showARMNeonShiftOp");
    989    }
    990 }
    991 
    992 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
    993    switch (op) {
    994       case ARMneon_VSHL:
    995       case ARMneon_VQSHL:
    996          return ".u";
    997       case ARMneon_VSAL:
    998       case ARMneon_VQSAL:
    999          return ".s";
   1000       /* ... */
   1001       default: vpanic("showARMNeonShiftOpDataType");
   1002    }
   1003 }
   1004 
   1005 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
   1006    switch (op) {
   1007       case ARMneon_TRN: return "vtrn";
   1008       case ARMneon_ZIP: return "vzip";
   1009       case ARMneon_UZP: return "vuzp";
   1010       /* ... */
   1011       default: vpanic("showARMNeonDualOp");
   1012    }
   1013 }
   1014 
   1015 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
   1016    switch (op) {
   1017       case ARMneon_TRN:
   1018       case ARMneon_ZIP:
   1019       case ARMneon_UZP:
   1020          return "i";
   1021       /* ... */
   1022       default: vpanic("showARMNeonDualOp");
   1023    }
   1024 }
   1025 
   1026 static const HChar* showARMNeonDataSize_wrk ( UInt size )
   1027 {
   1028    switch (size) {
   1029       case 0: return "8";
   1030       case 1: return "16";
   1031       case 2: return "32";
   1032       case 3: return "64";
   1033       default: vpanic("showARMNeonDataSize");
   1034    }
   1035 }
   1036 
   1037 static const HChar* showARMNeonDataSize ( ARMInstr* i )
   1038 {
   1039    switch (i->tag) {
   1040       case ARMin_NBinary:
   1041          if (i->ARMin.NBinary.op == ARMneon_VEXT)
   1042             return "8";
   1043          if (i->ARMin.NBinary.op == ARMneon_VAND ||
   1044              i->ARMin.NBinary.op == ARMneon_VORR ||
   1045              i->ARMin.NBinary.op == ARMneon_VXOR)
   1046             return "";
   1047          return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
   1048       case ARMin_NUnary:
   1049          if (i->ARMin.NUnary.op == ARMneon_COPY ||
   1050              i->ARMin.NUnary.op == ARMneon_NOT ||
   1051              i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
   1052              i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
   1053              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1054              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1055              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1056              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
   1057              i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
   1058              i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
   1059              i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
   1060              i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
   1061             return "";
   1062          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1063              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1064              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1065             UInt size;
   1066             size = i->ARMin.NUnary.size;
   1067             if (size & 0x40)
   1068                return "64";
   1069             if (size & 0x20)
   1070                return "32";
   1071             if (size & 0x10)
   1072                return "16";
   1073             if (size & 0x08)
   1074                return "8";
   1075             vpanic("showARMNeonDataSize");
   1076          }
   1077          return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
   1078       case ARMin_NUnaryS:
   1079          if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
   1080             int size;
   1081             size = i->ARMin.NUnaryS.size;
   1082             if ((size & 1) == 1)
   1083                return "8";
   1084             if ((size & 3) == 2)
   1085                return "16";
   1086             if ((size & 7) == 4)
   1087                return "32";
   1088             vpanic("showARMNeonDataSize");
   1089          }
   1090          return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
   1091       case ARMin_NShift:
   1092          return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
   1093       case ARMin_NDual:
   1094          return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
   1095       default:
   1096          vpanic("showARMNeonDataSize");
   1097    }
   1098 }
   1099 
   1100 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
   1101                          HReg dst, HReg argL, ARMRI84* argR ) {
   1102    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1103    i->tag            = ARMin_Alu;
   1104    i->ARMin.Alu.op   = op;
   1105    i->ARMin.Alu.dst  = dst;
   1106    i->ARMin.Alu.argL = argL;
   1107    i->ARMin.Alu.argR = argR;
   1108    return i;
   1109 }
   1110 ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
   1111                             HReg dst, HReg argL, ARMRI5* argR ) {
   1112    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1113    i->tag              = ARMin_Shift;
   1114    i->ARMin.Shift.op   = op;
   1115    i->ARMin.Shift.dst  = dst;
   1116    i->ARMin.Shift.argL = argL;
   1117    i->ARMin.Shift.argR = argR;
   1118    return i;
   1119 }
   1120 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
   1121    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1122    i->tag             = ARMin_Unary;
   1123    i->ARMin.Unary.op  = op;
   1124    i->ARMin.Unary.dst = dst;
   1125    i->ARMin.Unary.src = src;
   1126    return i;
   1127 }
   1128 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
   1129    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1130    i->tag                  = ARMin_CmpOrTst;
   1131    i->ARMin.CmpOrTst.isCmp = isCmp;
   1132    i->ARMin.CmpOrTst.argL  = argL;
   1133    i->ARMin.CmpOrTst.argR  = argR;
   1134    return i;
   1135 }
   1136 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
   1137    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1138    i->tag           = ARMin_Mov;
   1139    i->ARMin.Mov.dst = dst;
   1140    i->ARMin.Mov.src = src;
   1141    return i;
   1142 }
   1143 ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
   1144    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1145    i->tag               = ARMin_Imm32;
   1146    i->ARMin.Imm32.dst   = dst;
   1147    i->ARMin.Imm32.imm32 = imm32;
   1148    return i;
   1149 }
   1150 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
   1151                             Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1152    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1153    i->tag                 = ARMin_LdSt32;
   1154    i->ARMin.LdSt32.cc     = cc;
   1155    i->ARMin.LdSt32.isLoad = isLoad;
   1156    i->ARMin.LdSt32.rD     = rD;
   1157    i->ARMin.LdSt32.amode  = amode;
   1158    vassert(cc != ARMcc_NV);
   1159    return i;
   1160 }
   1161 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
   1162                             Bool isLoad, Bool signedLoad,
   1163                             HReg rD, ARMAMode2* amode ) {
   1164    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1165    i->tag                     = ARMin_LdSt16;
   1166    i->ARMin.LdSt16.cc         = cc;
   1167    i->ARMin.LdSt16.isLoad     = isLoad;
   1168    i->ARMin.LdSt16.signedLoad = signedLoad;
   1169    i->ARMin.LdSt16.rD         = rD;
   1170    i->ARMin.LdSt16.amode      = amode;
   1171    vassert(cc != ARMcc_NV);
   1172    return i;
   1173 }
   1174 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
   1175                             Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1176    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1177    i->tag                 = ARMin_LdSt8U;
   1178    i->ARMin.LdSt8U.cc     = cc;
   1179    i->ARMin.LdSt8U.isLoad = isLoad;
   1180    i->ARMin.LdSt8U.rD     = rD;
   1181    i->ARMin.LdSt8U.amode  = amode;
   1182    vassert(cc != ARMcc_NV);
   1183    return i;
   1184 }
   1185 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
   1186    ARMInstr* i         = LibVEX_Alloc(sizeof(ARMInstr));
   1187    i->tag              = ARMin_Ld8S;
   1188    i->ARMin.Ld8S.cc    = cc;
   1189    i->ARMin.Ld8S.rD    = rD;
   1190    i->ARMin.Ld8S.amode = amode;
   1191    vassert(cc != ARMcc_NV);
   1192    return i;
   1193 }
   1194 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
   1195                              ARMCondCode cond, Bool toFastEP ) {
   1196    ARMInstr* i               = LibVEX_Alloc(sizeof(ARMInstr));
   1197    i->tag                    = ARMin_XDirect;
   1198    i->ARMin.XDirect.dstGA    = dstGA;
   1199    i->ARMin.XDirect.amR15T   = amR15T;
   1200    i->ARMin.XDirect.cond     = cond;
   1201    i->ARMin.XDirect.toFastEP = toFastEP;
   1202    return i;
   1203 }
   1204 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
   1205                             ARMCondCode cond ) {
   1206    ARMInstr* i            = LibVEX_Alloc(sizeof(ARMInstr));
   1207    i->tag                 = ARMin_XIndir;
   1208    i->ARMin.XIndir.dstGA  = dstGA;
   1209    i->ARMin.XIndir.amR15T = amR15T;
   1210    i->ARMin.XIndir.cond   = cond;
   1211    return i;
   1212 }
   1213 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
   1214                                ARMCondCode cond, IRJumpKind jk ) {
   1215    ARMInstr* i               = LibVEX_Alloc(sizeof(ARMInstr));
   1216    i->tag                    = ARMin_XAssisted;
   1217    i->ARMin.XAssisted.dstGA  = dstGA;
   1218    i->ARMin.XAssisted.amR15T = amR15T;
   1219    i->ARMin.XAssisted.cond   = cond;
   1220    i->ARMin.XAssisted.jk     = jk;
   1221    return i;
   1222 }
   1223 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
   1224    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1225    i->tag             = ARMin_CMov;
   1226    i->ARMin.CMov.cond = cond;
   1227    i->ARMin.CMov.dst  = dst;
   1228    i->ARMin.CMov.src  = src;
   1229    vassert(cond != ARMcc_AL);
   1230    return i;
   1231 }
   1232 ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs,
   1233                           RetLoc rloc ) {
   1234    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1235    i->tag                 = ARMin_Call;
   1236    i->ARMin.Call.cond     = cond;
   1237    i->ARMin.Call.target   = target;
   1238    i->ARMin.Call.nArgRegs = nArgRegs;
   1239    i->ARMin.Call.rloc     = rloc;
   1240    vassert(is_sane_RetLoc(rloc));
   1241    return i;
   1242 }
   1243 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
   1244    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1245    i->tag          = ARMin_Mul;
   1246    i->ARMin.Mul.op = op;
   1247    return i;
   1248 }
   1249 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
   1250    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1251    i->tag             = ARMin_LdrEX;
   1252    i->ARMin.LdrEX.szB = szB;
   1253    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1254    return i;
   1255 }
   1256 ARMInstr* ARMInstr_StrEX ( Int szB ) {
   1257    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1258    i->tag             = ARMin_StrEX;
   1259    i->ARMin.StrEX.szB = szB;
   1260    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1261    return i;
   1262 }
   1263 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
   1264    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1265    i->tag                 = ARMin_VLdStD;
   1266    i->ARMin.VLdStD.isLoad = isLoad;
   1267    i->ARMin.VLdStD.dD     = dD;
   1268    i->ARMin.VLdStD.amode  = am;
   1269    return i;
   1270 }
   1271 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
   1272    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1273    i->tag                 = ARMin_VLdStS;
   1274    i->ARMin.VLdStS.isLoad = isLoad;
   1275    i->ARMin.VLdStS.fD     = fD;
   1276    i->ARMin.VLdStS.amode  = am;
   1277    return i;
   1278 }
   1279 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1280    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1281    i->tag              = ARMin_VAluD;
   1282    i->ARMin.VAluD.op   = op;
   1283    i->ARMin.VAluD.dst  = dst;
   1284    i->ARMin.VAluD.argL = argL;
   1285    i->ARMin.VAluD.argR = argR;
   1286    return i;
   1287 }
   1288 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1289    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1290    i->tag              = ARMin_VAluS;
   1291    i->ARMin.VAluS.op   = op;
   1292    i->ARMin.VAluS.dst  = dst;
   1293    i->ARMin.VAluS.argL = argL;
   1294    i->ARMin.VAluS.argR = argR;
   1295    return i;
   1296 }
   1297 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1298    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1299    i->tag               = ARMin_VUnaryD;
   1300    i->ARMin.VUnaryD.op  = op;
   1301    i->ARMin.VUnaryD.dst = dst;
   1302    i->ARMin.VUnaryD.src = src;
   1303    return i;
   1304 }
   1305 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1306    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1307    i->tag               = ARMin_VUnaryS;
   1308    i->ARMin.VUnaryS.op  = op;
   1309    i->ARMin.VUnaryS.dst = dst;
   1310    i->ARMin.VUnaryS.src = src;
   1311    return i;
   1312 }
   1313 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
   1314    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1315    i->tag              = ARMin_VCmpD;
   1316    i->ARMin.VCmpD.argL = argL;
   1317    i->ARMin.VCmpD.argR = argR;
   1318    return i;
   1319 }
   1320 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
   1321    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1322    i->tag               = ARMin_VCMovD;
   1323    i->ARMin.VCMovD.cond = cond;
   1324    i->ARMin.VCMovD.dst  = dst;
   1325    i->ARMin.VCMovD.src  = src;
   1326    vassert(cond != ARMcc_AL);
   1327    return i;
   1328 }
   1329 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
   1330    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1331    i->tag               = ARMin_VCMovS;
   1332    i->ARMin.VCMovS.cond = cond;
   1333    i->ARMin.VCMovS.dst  = dst;
   1334    i->ARMin.VCMovS.src  = src;
   1335    vassert(cond != ARMcc_AL);
   1336    return i;
   1337 }
   1338 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
   1339    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1340    i->tag               = ARMin_VCvtSD;
   1341    i->ARMin.VCvtSD.sToD = sToD;
   1342    i->ARMin.VCvtSD.dst  = dst;
   1343    i->ARMin.VCvtSD.src  = src;
   1344    return i;
   1345 }
   1346 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
   1347    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1348    i->tag              = ARMin_VXferD;
   1349    i->ARMin.VXferD.toD = toD;
   1350    i->ARMin.VXferD.dD  = dD;
   1351    i->ARMin.VXferD.rHi = rHi;
   1352    i->ARMin.VXferD.rLo = rLo;
   1353    return i;
   1354 }
   1355 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
   1356    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1357    i->tag              = ARMin_VXferS;
   1358    i->ARMin.VXferS.toS = toS;
   1359    i->ARMin.VXferS.fD  = fD;
   1360    i->ARMin.VXferS.rLo = rLo;
   1361    return i;
   1362 }
   1363 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
   1364                             HReg dst, HReg src ) {
   1365    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1366    i->tag                = ARMin_VCvtID;
   1367    i->ARMin.VCvtID.iToD  = iToD;
   1368    i->ARMin.VCvtID.syned = syned;
   1369    i->ARMin.VCvtID.dst   = dst;
   1370    i->ARMin.VCvtID.src   = src;
   1371    return i;
   1372 }
   1373 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
   1374    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1375    i->tag                 = ARMin_FPSCR;
   1376    i->ARMin.FPSCR.toFPSCR = toFPSCR;
   1377    i->ARMin.FPSCR.iReg    = iReg;
   1378    return i;
   1379 }
   1380 ARMInstr* ARMInstr_MFence ( void ) {
   1381    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1382    i->tag      = ARMin_MFence;
   1383    return i;
   1384 }
   1385 ARMInstr* ARMInstr_CLREX( void ) {
   1386    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1387    i->tag      = ARMin_CLREX;
   1388    return i;
   1389 }
   1390 
   1391 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
   1392    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1393    i->tag                  = ARMin_NLdStQ;
   1394    i->ARMin.NLdStQ.isLoad  = isLoad;
   1395    i->ARMin.NLdStQ.dQ      = dQ;
   1396    i->ARMin.NLdStQ.amode   = amode;
   1397    return i;
   1398 }
   1399 
   1400 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
   1401    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1402    i->tag                  = ARMin_NLdStD;
   1403    i->ARMin.NLdStD.isLoad  = isLoad;
   1404    i->ARMin.NLdStD.dD      = dD;
   1405    i->ARMin.NLdStD.amode   = amode;
   1406    return i;
   1407 }
   1408 
   1409 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
   1410                             UInt size, Bool Q ) {
   1411    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1412    i->tag                = ARMin_NUnary;
   1413    i->ARMin.NUnary.op   = op;
   1414    i->ARMin.NUnary.src  = nQ;
   1415    i->ARMin.NUnary.dst  = dQ;
   1416    i->ARMin.NUnary.size = size;
   1417    i->ARMin.NUnary.Q    = Q;
   1418    return i;
   1419 }
   1420 
   1421 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
   1422                              UInt size, Bool Q ) {
   1423    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1424    i->tag                = ARMin_NUnaryS;
   1425    i->ARMin.NUnaryS.op   = op;
   1426    i->ARMin.NUnaryS.src  = src;
   1427    i->ARMin.NUnaryS.dst  = dst;
   1428    i->ARMin.NUnaryS.size = size;
   1429    i->ARMin.NUnaryS.Q    = Q;
   1430    return i;
   1431 }
   1432 
   1433 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
   1434                            UInt size, Bool Q ) {
   1435    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1436    i->tag                = ARMin_NDual;
   1437    i->ARMin.NDual.op   = op;
   1438    i->ARMin.NDual.arg1 = nQ;
   1439    i->ARMin.NDual.arg2 = mQ;
   1440    i->ARMin.NDual.size = size;
   1441    i->ARMin.NDual.Q    = Q;
   1442    return i;
   1443 }
   1444 
   1445 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
   1446                              HReg dst, HReg argL, HReg argR,
   1447                              UInt size, Bool Q ) {
   1448    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1449    i->tag                = ARMin_NBinary;
   1450    i->ARMin.NBinary.op   = op;
   1451    i->ARMin.NBinary.argL = argL;
   1452    i->ARMin.NBinary.argR = argR;
   1453    i->ARMin.NBinary.dst  = dst;
   1454    i->ARMin.NBinary.size = size;
   1455    i->ARMin.NBinary.Q    = Q;
   1456    return i;
   1457 }
   1458 
   1459 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
   1460    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1461    i->tag         = ARMin_NeonImm;
   1462    i->ARMin.NeonImm.dst = dst;
   1463    i->ARMin.NeonImm.imm = imm;
   1464    return i;
   1465 }
   1466 
   1467 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
   1468    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1469    i->tag               = ARMin_NCMovQ;
   1470    i->ARMin.NCMovQ.cond = cond;
   1471    i->ARMin.NCMovQ.dst  = dst;
   1472    i->ARMin.NCMovQ.src  = src;
   1473    vassert(cond != ARMcc_AL);
   1474    return i;
   1475 }
   1476 
   1477 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
   1478                             HReg dst, HReg argL, HReg argR,
   1479                             UInt size, Bool Q ) {
   1480    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1481    i->tag                = ARMin_NShift;
   1482    i->ARMin.NShift.op   = op;
   1483    i->ARMin.NShift.argL = argL;
   1484    i->ARMin.NShift.argR = argR;
   1485    i->ARMin.NShift.dst  = dst;
   1486    i->ARMin.NShift.size = size;
   1487    i->ARMin.NShift.Q    = Q;
   1488    return i;
   1489 }
   1490 
   1491 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
   1492 {
   1493    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1494    i->tag              = ARMin_NShl64;
   1495    i->ARMin.NShl64.dst = dst;
   1496    i->ARMin.NShl64.src = src;
   1497    i->ARMin.NShl64.amt = amt;
   1498    vassert(amt >= 1 && amt <= 63);
   1499    return i;
   1500 }
   1501 
   1502 /* Helper copy-pasted from isel.c */
   1503 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
   1504 {
   1505    UInt i;
   1506    for (i = 0; i < 16; i++) {
   1507       if (0 == (u & 0xFFFFFF00)) {
   1508          *u8 = u;
   1509          *u4 = i;
   1510          return True;
   1511       }
   1512       u = ROR32(u, 30);
   1513    }
   1514    vassert(i == 16);
   1515    return False;
   1516 }
   1517 
   1518 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
   1519    UInt u8, u4;
   1520    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
   1521    /* Try to generate single ADD if possible */
   1522    if (fitsIn8x4(&u8, &u4, imm32)) {
   1523       i->tag            = ARMin_Alu;
   1524       i->ARMin.Alu.op   = ARMalu_ADD;
   1525       i->ARMin.Alu.dst  = rD;
   1526       i->ARMin.Alu.argL = rN;
   1527       i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
   1528    } else {
   1529       i->tag               = ARMin_Add32;
   1530       i->ARMin.Add32.rD    = rD;
   1531       i->ARMin.Add32.rN    = rN;
   1532       i->ARMin.Add32.imm32 = imm32;
   1533    }
   1534    return i;
   1535 }
   1536 
   1537 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
   1538                              ARMAMode1* amFailAddr ) {
   1539    ARMInstr* i                 = LibVEX_Alloc(sizeof(ARMInstr));
   1540    i->tag                      = ARMin_EvCheck;
   1541    i->ARMin.EvCheck.amCounter  = amCounter;
   1542    i->ARMin.EvCheck.amFailAddr = amFailAddr;
   1543    return i;
   1544 }
   1545 
   1546 ARMInstr* ARMInstr_ProfInc ( void ) {
   1547    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
   1548    i->tag      = ARMin_ProfInc;
   1549    return i;
   1550 }
   1551 
   1552 /* ... */
   1553 
   1554 void ppARMInstr ( ARMInstr* i ) {
   1555    switch (i->tag) {
   1556       case ARMin_Alu:
   1557          vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
   1558          ppHRegARM(i->ARMin.Alu.dst);
   1559          vex_printf(", ");
   1560          ppHRegARM(i->ARMin.Alu.argL);
   1561          vex_printf(", ");
   1562          ppARMRI84(i->ARMin.Alu.argR);
   1563          return;
   1564       case ARMin_Shift:
   1565          vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
   1566          ppHRegARM(i->ARMin.Shift.dst);
   1567          vex_printf(", ");
   1568          ppHRegARM(i->ARMin.Shift.argL);
   1569          vex_printf(", ");
   1570          ppARMRI5(i->ARMin.Shift.argR);
   1571          return;
   1572       case ARMin_Unary:
   1573          vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
   1574          ppHRegARM(i->ARMin.Unary.dst);
   1575          vex_printf(", ");
   1576          ppHRegARM(i->ARMin.Unary.src);
   1577          return;
   1578       case ARMin_CmpOrTst:
   1579          vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
   1580          ppHRegARM(i->ARMin.CmpOrTst.argL);
   1581          vex_printf(", ");
   1582          ppARMRI84(i->ARMin.CmpOrTst.argR);
   1583          return;
   1584       case ARMin_Mov:
   1585          vex_printf("mov   ");
   1586          ppHRegARM(i->ARMin.Mov.dst);
   1587          vex_printf(", ");
   1588          ppARMRI84(i->ARMin.Mov.src);
   1589          return;
   1590       case ARMin_Imm32:
   1591          vex_printf("imm   ");
   1592          ppHRegARM(i->ARMin.Imm32.dst);
   1593          vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
   1594          return;
   1595       case ARMin_LdSt32:
   1596          if (i->ARMin.LdSt32.isLoad) {
   1597             vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
   1598                                     : showARMCondCode(i->ARMin.LdSt32.cc));
   1599             ppHRegARM(i->ARMin.LdSt32.rD);
   1600             vex_printf(", ");
   1601             ppARMAMode1(i->ARMin.LdSt32.amode);
   1602          } else {
   1603             vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
   1604                                     : showARMCondCode(i->ARMin.LdSt32.cc));
   1605             ppARMAMode1(i->ARMin.LdSt32.amode);
   1606             vex_printf(", ");
   1607             ppHRegARM(i->ARMin.LdSt32.rD);
   1608          }
   1609          return;
   1610       case ARMin_LdSt16:
   1611          if (i->ARMin.LdSt16.isLoad) {
   1612             vex_printf("%s%s%s",
   1613                        i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
   1614                        i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
   1615                           : showARMCondCode(i->ARMin.LdSt16.cc),
   1616                        i->ARMin.LdSt16.signedLoad ? " " : "  ");
   1617             ppHRegARM(i->ARMin.LdSt16.rD);
   1618             vex_printf(", ");
   1619             ppARMAMode2(i->ARMin.LdSt16.amode);
   1620          } else {
   1621             vex_printf("strh%s  ",
   1622                        i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
   1623                           : showARMCondCode(i->ARMin.LdSt16.cc));
   1624             ppARMAMode2(i->ARMin.LdSt16.amode);
   1625             vex_printf(", ");
   1626             ppHRegARM(i->ARMin.LdSt16.rD);
   1627          }
   1628          return;
   1629       case ARMin_LdSt8U:
   1630          if (i->ARMin.LdSt8U.isLoad) {
   1631             vex_printf("ldrb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
   1632                                       : showARMCondCode(i->ARMin.LdSt8U.cc));
   1633             ppHRegARM(i->ARMin.LdSt8U.rD);
   1634             vex_printf(", ");
   1635             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1636          } else {
   1637             vex_printf("strb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
   1638                                       : showARMCondCode(i->ARMin.LdSt8U.cc));
   1639             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1640             vex_printf(", ");
   1641             ppHRegARM(i->ARMin.LdSt8U.rD);
   1642          }
   1643          return;
   1644       case ARMin_Ld8S:
   1645          vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? "  "
   1646                                    : showARMCondCode(i->ARMin.Ld8S.cc));
   1647          ppARMAMode2(i->ARMin.Ld8S.amode);
   1648          vex_printf(", ");
   1649          ppHRegARM(i->ARMin.Ld8S.rD);
   1650          return;
   1651       case ARMin_XDirect:
   1652          vex_printf("(xDirect) ");
   1653          vex_printf("if (%%cpsr.%s) { ",
   1654                     showARMCondCode(i->ARMin.XDirect.cond));
   1655          vex_printf("movw r12,0x%x; ",
   1656                     (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
   1657          vex_printf("movt r12,0x%x; ",
   1658                     (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
   1659          vex_printf("str r12,");
   1660          ppARMAMode1(i->ARMin.XDirect.amR15T);
   1661          vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
   1662                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
   1663          vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
   1664                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
   1665          vex_printf("blx r12 }");
   1666          return;
   1667       case ARMin_XIndir:
   1668          vex_printf("(xIndir) ");
   1669          vex_printf("if (%%cpsr.%s) { ",
   1670                     showARMCondCode(i->ARMin.XIndir.cond));
   1671          vex_printf("str ");
   1672          ppHRegARM(i->ARMin.XIndir.dstGA);
   1673          vex_printf(",");
   1674          ppARMAMode1(i->ARMin.XIndir.amR15T);
   1675          vex_printf("; movw r12,LO16($disp_cp_xindir); ");
   1676          vex_printf("movt r12,HI16($disp_cp_xindir); ");
   1677          vex_printf("blx r12 }");
   1678          return;
   1679       case ARMin_XAssisted:
   1680          vex_printf("(xAssisted) ");
   1681          vex_printf("if (%%cpsr.%s) { ",
   1682                     showARMCondCode(i->ARMin.XAssisted.cond));
   1683          vex_printf("str ");
   1684          ppHRegARM(i->ARMin.XAssisted.dstGA);
   1685          vex_printf(",");
   1686          ppARMAMode1(i->ARMin.XAssisted.amR15T);
   1687          vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
   1688                     (Int)i->ARMin.XAssisted.jk);
   1689          vex_printf("movw r12,LO16($disp_cp_xassisted); ");
   1690          vex_printf("movt r12,HI16($disp_cp_xassisted); ");
   1691          vex_printf("blx r12 }");
   1692          return;
   1693       case ARMin_CMov:
   1694          vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
   1695          ppHRegARM(i->ARMin.CMov.dst);
   1696          vex_printf(", ");
   1697          ppARMRI84(i->ARMin.CMov.src);
   1698          return;
   1699       case ARMin_Call:
   1700          vex_printf("call%s  ",
   1701                     i->ARMin.Call.cond==ARMcc_AL
   1702                        ? "" : showARMCondCode(i->ARMin.Call.cond));
   1703          vex_printf("0x%lx [nArgRegs=%d, ",
   1704                     i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
   1705          ppRetLoc(i->ARMin.Call.rloc);
   1706          vex_printf("]");
   1707          return;
   1708       case ARMin_Mul:
   1709          vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
   1710          if (i->ARMin.Mul.op == ARMmul_PLAIN) {
   1711             vex_printf("r0, r2, r3");
   1712          } else {
   1713             vex_printf("r1:r0, r2, r3");
   1714          }
   1715          return;
   1716       case ARMin_LdrEX: {
   1717          const HChar* sz = "";
   1718          switch (i->ARMin.LdrEX.szB) {
   1719             case 1: sz = "b"; break; case 2: sz = "h"; break;
   1720             case 8: sz = "d"; break; case 4: break;
   1721             default: vassert(0);
   1722          }
   1723          vex_printf("ldrex%s %sr2, [r4]",
   1724                     sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
   1725          return;
   1726       }
   1727       case ARMin_StrEX: {
   1728          const HChar* sz = "";
   1729          switch (i->ARMin.StrEX.szB) {
   1730             case 1: sz = "b"; break; case 2: sz = "h"; break;
   1731             case 8: sz = "d"; break; case 4: break;
   1732             default: vassert(0);
   1733          }
   1734          vex_printf("strex%s r0, %sr2, [r4]",
   1735                     sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
   1736          return;
   1737       }
   1738       case ARMin_VLdStD:
   1739          if (i->ARMin.VLdStD.isLoad) {
   1740             vex_printf("fldd  ");
   1741             ppHRegARM(i->ARMin.VLdStD.dD);
   1742             vex_printf(", ");
   1743             ppARMAModeV(i->ARMin.VLdStD.amode);
   1744          } else {
   1745             vex_printf("fstd  ");
   1746             ppARMAModeV(i->ARMin.VLdStD.amode);
   1747             vex_printf(", ");
   1748             ppHRegARM(i->ARMin.VLdStD.dD);
   1749          }
   1750          return;
   1751       case ARMin_VLdStS:
   1752          if (i->ARMin.VLdStS.isLoad) {
   1753             vex_printf("flds  ");
   1754             ppHRegARM(i->ARMin.VLdStS.fD);
   1755             vex_printf(", ");
   1756             ppARMAModeV(i->ARMin.VLdStS.amode);
   1757          } else {
   1758             vex_printf("fsts  ");
   1759             ppARMAModeV(i->ARMin.VLdStS.amode);
   1760             vex_printf(", ");
   1761             ppHRegARM(i->ARMin.VLdStS.fD);
   1762          }
   1763          return;
   1764       case ARMin_VAluD:
   1765          vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
   1766          ppHRegARM(i->ARMin.VAluD.dst);
   1767          vex_printf(", ");
   1768          ppHRegARM(i->ARMin.VAluD.argL);
   1769          vex_printf(", ");
   1770          ppHRegARM(i->ARMin.VAluD.argR);
   1771          return;
   1772       case ARMin_VAluS:
   1773          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
   1774          ppHRegARM(i->ARMin.VAluS.dst);
   1775          vex_printf(", ");
   1776          ppHRegARM(i->ARMin.VAluS.argL);
   1777          vex_printf(", ");
   1778          ppHRegARM(i->ARMin.VAluS.argR);
   1779          return;
   1780       case ARMin_VUnaryD:
   1781          vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
   1782          ppHRegARM(i->ARMin.VUnaryD.dst);
   1783          vex_printf(", ");
   1784          ppHRegARM(i->ARMin.VUnaryD.src);
   1785          return;
   1786       case ARMin_VUnaryS:
   1787          vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
   1788          ppHRegARM(i->ARMin.VUnaryS.dst);
   1789          vex_printf(", ");
   1790          ppHRegARM(i->ARMin.VUnaryS.src);
   1791          return;
   1792       case ARMin_VCmpD:
   1793          vex_printf("fcmpd ");
   1794          ppHRegARM(i->ARMin.VCmpD.argL);
   1795          vex_printf(", ");
   1796          ppHRegARM(i->ARMin.VCmpD.argR);
   1797          vex_printf(" ; fmstat");
   1798          return;
   1799       case ARMin_VCMovD:
   1800          vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
   1801          ppHRegARM(i->ARMin.VCMovD.dst);
   1802          vex_printf(", ");
   1803          ppHRegARM(i->ARMin.VCMovD.src);
   1804          return;
   1805       case ARMin_VCMovS:
   1806          vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
   1807          ppHRegARM(i->ARMin.VCMovS.dst);
   1808          vex_printf(", ");
   1809          ppHRegARM(i->ARMin.VCMovS.src);
   1810          return;
   1811       case ARMin_VCvtSD:
   1812          vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
   1813          ppHRegARM(i->ARMin.VCvtSD.dst);
   1814          vex_printf(", ");
   1815          ppHRegARM(i->ARMin.VCvtSD.src);
   1816          return;
   1817       case ARMin_VXferD:
   1818          vex_printf("vmov  ");
   1819          if (i->ARMin.VXferD.toD) {
   1820             ppHRegARM(i->ARMin.VXferD.dD);
   1821             vex_printf(", ");
   1822             ppHRegARM(i->ARMin.VXferD.rLo);
   1823             vex_printf(", ");
   1824             ppHRegARM(i->ARMin.VXferD.rHi);
   1825          } else {
   1826             ppHRegARM(i->ARMin.VXferD.rLo);
   1827             vex_printf(", ");
   1828             ppHRegARM(i->ARMin.VXferD.rHi);
   1829             vex_printf(", ");
   1830             ppHRegARM(i->ARMin.VXferD.dD);
   1831          }
   1832          return;
   1833       case ARMin_VXferS:
   1834          vex_printf("vmov  ");
   1835          if (i->ARMin.VXferS.toS) {
   1836             ppHRegARM(i->ARMin.VXferS.fD);
   1837             vex_printf(", ");
   1838             ppHRegARM(i->ARMin.VXferS.rLo);
   1839          } else {
   1840             ppHRegARM(i->ARMin.VXferS.rLo);
   1841             vex_printf(", ");
   1842             ppHRegARM(i->ARMin.VXferS.fD);
   1843          }
   1844          return;
   1845       case ARMin_VCvtID: {
   1846          const HChar* nm = "?";
   1847          if (i->ARMin.VCvtID.iToD) {
   1848             nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
   1849          } else {
   1850             nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
   1851          }
   1852          vex_printf("%s ", nm);
   1853          ppHRegARM(i->ARMin.VCvtID.dst);
   1854          vex_printf(", ");
   1855          ppHRegARM(i->ARMin.VCvtID.src);
   1856          return;
   1857       }
   1858       case ARMin_FPSCR:
   1859          if (i->ARMin.FPSCR.toFPSCR) {
   1860             vex_printf("fmxr  fpscr, ");
   1861             ppHRegARM(i->ARMin.FPSCR.iReg);
   1862          } else {
   1863             vex_printf("fmrx  ");
   1864             ppHRegARM(i->ARMin.FPSCR.iReg);
   1865             vex_printf(", fpscr");
   1866          }
   1867          return;
   1868       case ARMin_MFence:
   1869          vex_printf("(mfence) dsb sy; dmb sy; isb");
   1870          return;
   1871       case ARMin_CLREX:
   1872          vex_printf("clrex");
   1873          return;
   1874       case ARMin_NLdStQ:
   1875          if (i->ARMin.NLdStQ.isLoad)
   1876             vex_printf("vld1.32 {");
   1877          else
   1878             vex_printf("vst1.32 {");
   1879          ppHRegARM(i->ARMin.NLdStQ.dQ);
   1880          vex_printf("} ");
   1881          ppARMAModeN(i->ARMin.NLdStQ.amode);
   1882          return;
   1883       case ARMin_NLdStD:
   1884          if (i->ARMin.NLdStD.isLoad)
   1885             vex_printf("vld1.32 {");
   1886          else
   1887             vex_printf("vst1.32 {");
   1888          ppHRegARM(i->ARMin.NLdStD.dD);
   1889          vex_printf("} ");
   1890          ppARMAModeN(i->ARMin.NLdStD.amode);
   1891          return;
   1892       case ARMin_NUnary:
   1893          vex_printf("%s%s%s  ",
   1894                     showARMNeonUnOp(i->ARMin.NUnary.op),
   1895                     showARMNeonUnOpDataType(i->ARMin.NUnary.op),
   1896                     showARMNeonDataSize(i));
   1897          ppHRegARM(i->ARMin.NUnary.dst);
   1898          vex_printf(", ");
   1899          ppHRegARM(i->ARMin.NUnary.src);
   1900          if (i->ARMin.NUnary.op == ARMneon_EQZ)
   1901             vex_printf(", #0");
   1902          if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1903              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1904              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1905              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
   1906             vex_printf(", #%d", i->ARMin.NUnary.size);
   1907          }
   1908          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1909              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1910              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1911             UInt size;
   1912             size = i->ARMin.NUnary.size;
   1913             if (size & 0x40) {
   1914                vex_printf(", #%d", size - 64);
   1915             } else if (size & 0x20) {
   1916                vex_printf(", #%d", size - 32);
   1917             } else if (size & 0x10) {
   1918                vex_printf(", #%d", size - 16);
   1919             } else if (size & 0x08) {
   1920                vex_printf(", #%d", size - 8);
   1921             }
   1922          }
   1923          return;
   1924       case ARMin_NUnaryS:
   1925          vex_printf("%s%s%s  ",
   1926                     showARMNeonUnOpS(i->ARMin.NUnaryS.op),
   1927                     showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
   1928                     showARMNeonDataSize(i));
   1929          ppARMNRS(i->ARMin.NUnaryS.dst);
   1930          vex_printf(", ");
   1931          ppARMNRS(i->ARMin.NUnaryS.src);
   1932          return;
   1933       case ARMin_NShift:
   1934          vex_printf("%s%s%s  ",
   1935                     showARMNeonShiftOp(i->ARMin.NShift.op),
   1936                     showARMNeonShiftOpDataType(i->ARMin.NShift.op),
   1937                     showARMNeonDataSize(i));
   1938          ppHRegARM(i->ARMin.NShift.dst);
   1939          vex_printf(", ");
   1940          ppHRegARM(i->ARMin.NShift.argL);
   1941          vex_printf(", ");
   1942          ppHRegARM(i->ARMin.NShift.argR);
   1943          return;
   1944       case ARMin_NShl64:
   1945          vex_printf("vshl.i64 ");
   1946          ppHRegARM(i->ARMin.NShl64.dst);
   1947          vex_printf(", ");
   1948          ppHRegARM(i->ARMin.NShl64.src);
   1949          vex_printf(", #%u", i->ARMin.NShl64.amt);
   1950          return;
   1951       case ARMin_NDual:
   1952          vex_printf("%s%s%s  ",
   1953                     showARMNeonDualOp(i->ARMin.NDual.op),
   1954                     showARMNeonDualOpDataType(i->ARMin.NDual.op),
   1955                     showARMNeonDataSize(i));
   1956          ppHRegARM(i->ARMin.NDual.arg1);
   1957          vex_printf(", ");
   1958          ppHRegARM(i->ARMin.NDual.arg2);
   1959          return;
   1960       case ARMin_NBinary:
   1961          vex_printf("%s%s%s",
   1962                     showARMNeonBinOp(i->ARMin.NBinary.op),
   1963                     showARMNeonBinOpDataType(i->ARMin.NBinary.op),
   1964                     showARMNeonDataSize(i));
   1965          vex_printf("  ");
   1966          ppHRegARM(i->ARMin.NBinary.dst);
   1967          vex_printf(", ");
   1968          ppHRegARM(i->ARMin.NBinary.argL);
   1969          vex_printf(", ");
   1970          ppHRegARM(i->ARMin.NBinary.argR);
   1971          return;
   1972       case ARMin_NeonImm:
   1973          vex_printf("vmov  ");
   1974          ppHRegARM(i->ARMin.NeonImm.dst);
   1975          vex_printf(", ");
   1976          ppARMNImm(i->ARMin.NeonImm.imm);
   1977          return;
   1978       case ARMin_NCMovQ:
   1979          vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
   1980          ppHRegARM(i->ARMin.NCMovQ.dst);
   1981          vex_printf(", ");
   1982          ppHRegARM(i->ARMin.NCMovQ.src);
   1983          return;
   1984       case ARMin_Add32:
   1985          vex_printf("add32 ");
   1986          ppHRegARM(i->ARMin.Add32.rD);
   1987          vex_printf(", ");
   1988          ppHRegARM(i->ARMin.Add32.rN);
   1989          vex_printf(", ");
   1990          vex_printf("%d", i->ARMin.Add32.imm32);
   1991          return;
   1992       case ARMin_EvCheck:
   1993          vex_printf("(evCheck) ldr r12,");
   1994          ppARMAMode1(i->ARMin.EvCheck.amCounter);
   1995          vex_printf("; subs r12,r12,$1; str r12,");
   1996          ppARMAMode1(i->ARMin.EvCheck.amCounter);
   1997          vex_printf("; bpl nofail; ldr r12,");
   1998          ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
   1999          vex_printf("; bx r12; nofail:");
   2000          return;
   2001       case ARMin_ProfInc:
   2002          vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
   2003                     "movw r12,HI16($NotKnownYet); "
   2004                     "ldr r11,[r12]; "
   2005                     "adds r11,r11,$1; "
   2006                     "str r11,[r12]; "
   2007                     "ldr r11,[r12+4]; "
   2008                     "adc r11,r11,$0; "
   2009                     "str r11,[r12+4]");
   2010          return;
   2011       default:
   2012          vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
   2013          vpanic("ppARMInstr(1)");
   2014          return;
   2015    }
   2016 }
   2017 
   2018 
   2019 /* --------- Helpers for register allocation. --------- */
   2020 
   2021 void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
   2022 {
   2023    vassert(mode64 == False);
   2024    initHRegUsage(u);
   2025    switch (i->tag) {
   2026       case ARMin_Alu:
   2027          addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
   2028          addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
   2029          addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
   2030          return;
   2031       case ARMin_Shift:
   2032          addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
   2033          addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
   2034          addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
   2035          return;
   2036       case ARMin_Unary:
   2037          addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
   2038          addHRegUse(u, HRmRead, i->ARMin.Unary.src);
   2039          return;
   2040       case ARMin_CmpOrTst:
   2041          addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
   2042          addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
   2043          return;
   2044       case ARMin_Mov:
   2045          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
   2046          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
   2047          return;
   2048       case ARMin_Imm32:
   2049          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
   2050          return;
   2051       case ARMin_LdSt32:
   2052          addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
   2053          if (i->ARMin.LdSt32.isLoad) {
   2054             addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
   2055             if (i->ARMin.LdSt32.cc != ARMcc_AL)
   2056                addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
   2057          } else {
   2058             addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
   2059          }
   2060          return;
   2061       case ARMin_LdSt16:
   2062          addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
   2063          if (i->ARMin.LdSt16.isLoad) {
   2064             addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
   2065             if (i->ARMin.LdSt16.cc != ARMcc_AL)
   2066                addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
   2067          } else {
   2068             addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
   2069          }
   2070          return;
   2071       case ARMin_LdSt8U:
   2072          addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
   2073          if (i->ARMin.LdSt8U.isLoad) {
   2074             addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
   2075             if (i->ARMin.LdSt8U.cc != ARMcc_AL)
   2076                addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
   2077          } else {
   2078             addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
   2079          }
   2080          return;
   2081       case ARMin_Ld8S:
   2082          addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
   2083          addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
   2084          if (i->ARMin.Ld8S.cc != ARMcc_AL)
   2085             addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
   2086          return;
   2087       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
   2088          conditionally exit the block.  Hence we only need to list (1)
   2089          the registers that they read, and (2) the registers that they
   2090          write in the case where the block is not exited.  (2) is
   2091          empty, hence only (1) is relevant here. */
   2092       case ARMin_XDirect:
   2093          addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
   2094          return;
   2095       case ARMin_XIndir:
   2096          addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
   2097          addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
   2098          return;
   2099       case ARMin_XAssisted:
   2100          addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
   2101          addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
   2102          return;
   2103       case ARMin_CMov:
   2104          addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
   2105          addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
   2106          addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
   2107          return;
   2108       case ARMin_Call:
   2109          /* logic and comments copied/modified from x86 back end */
   2110          /* This is a bit subtle. */
   2111          /* First off, claim it trashes all the caller-saved regs
   2112             which fall within the register allocator's jurisdiction.
   2113             These I believe to be r0,1,2,3.  If it turns out that r9
   2114             is also caller-saved, then we'll have to add that here
   2115             too. */
   2116          addHRegUse(u, HRmWrite, hregARM_R0());
   2117          addHRegUse(u, HRmWrite, hregARM_R1());
   2118          addHRegUse(u, HRmWrite, hregARM_R2());
   2119          addHRegUse(u, HRmWrite, hregARM_R3());
   2120          /* Now we have to state any parameter-carrying registers
   2121             which might be read.  This depends on nArgRegs. */
   2122          switch (i->ARMin.Call.nArgRegs) {
   2123             case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
   2124             case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
   2125             case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
   2126             case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
   2127             case 0: break;
   2128             default: vpanic("getRegUsage_ARM:Call:regparms");
   2129          }
   2130          /* Finally, there is the issue that the insn trashes a
   2131             register because the literal target address has to be
   2132             loaded into a register.  Fortunately, for the nArgRegs=
   2133             0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
   2134             this does not cause any further damage.  For the
   2135             nArgRegs=4 case, we'll have to choose another register
   2136             arbitrarily since all the caller saved regs are used for
   2137             parameters, and so we might as well choose r11.
   2138             */
   2139          if (i->ARMin.Call.nArgRegs == 4)
   2140             addHRegUse(u, HRmWrite, hregARM_R11());
   2141          /* Upshot of this is that the assembler really must observe
   2142             the here-stated convention of which register to use as an
   2143             address temporary, depending on nArgRegs: 0==r0,
   2144             1==r1, 2==r2, 3==r3, 4==r11 */
   2145          return;
   2146       case ARMin_Mul:
   2147          addHRegUse(u, HRmRead, hregARM_R2());
   2148          addHRegUse(u, HRmRead, hregARM_R3());
   2149          addHRegUse(u, HRmWrite, hregARM_R0());
   2150          if (i->ARMin.Mul.op != ARMmul_PLAIN)
   2151             addHRegUse(u, HRmWrite, hregARM_R1());
   2152          return;
   2153       case ARMin_LdrEX:
   2154          addHRegUse(u, HRmRead, hregARM_R4());
   2155          addHRegUse(u, HRmWrite, hregARM_R2());
   2156          if (i->ARMin.LdrEX.szB == 8)
   2157             addHRegUse(u, HRmWrite, hregARM_R3());
   2158          return;
   2159       case ARMin_StrEX:
   2160          addHRegUse(u, HRmRead, hregARM_R4());
   2161          addHRegUse(u, HRmWrite, hregARM_R0());
   2162          addHRegUse(u, HRmRead, hregARM_R2());
   2163          if (i->ARMin.StrEX.szB == 8)
   2164             addHRegUse(u, HRmRead, hregARM_R3());
   2165          return;
   2166       case ARMin_VLdStD:
   2167          addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
   2168          if (i->ARMin.VLdStD.isLoad) {
   2169             addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
   2170          } else {
   2171             addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
   2172          }
   2173          return;
   2174       case ARMin_VLdStS:
   2175          addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
   2176          if (i->ARMin.VLdStS.isLoad) {
   2177             addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
   2178          } else {
   2179             addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
   2180          }
   2181          return;
   2182       case ARMin_VAluD:
   2183          addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
   2184          addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
   2185          addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
   2186          return;
   2187       case ARMin_VAluS:
   2188          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
   2189          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
   2190          addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
   2191          return;
   2192       case ARMin_VUnaryD:
   2193          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
   2194          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
   2195          return;
   2196       case ARMin_VUnaryS:
   2197          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
   2198          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
   2199          return;
   2200       case ARMin_VCmpD:
   2201          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
   2202          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
   2203          return;
   2204       case ARMin_VCMovD:
   2205          addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
   2206          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
   2207          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
   2208          return;
   2209       case ARMin_VCMovS:
   2210          addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
   2211          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
   2212          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
   2213          return;
   2214       case ARMin_VCvtSD:
   2215          addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
   2216          addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
   2217          return;
   2218       case ARMin_VXferD:
   2219          if (i->ARMin.VXferD.toD) {
   2220             addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
   2221             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
   2222             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
   2223          } else {
   2224             addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
   2225             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
   2226             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
   2227          }
   2228          return;
   2229       case ARMin_VXferS:
   2230          if (i->ARMin.VXferS.toS) {
   2231             addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
   2232             addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
   2233          } else {
   2234             addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
   2235             addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
   2236          }
   2237          return;
   2238       case ARMin_VCvtID:
   2239          addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
   2240          addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
   2241          return;
   2242       case ARMin_FPSCR:
   2243          if (i->ARMin.FPSCR.toFPSCR)
   2244             addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
   2245          else
   2246             addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
   2247          return;
   2248       case ARMin_MFence:
   2249          return;
   2250       case ARMin_CLREX:
   2251          return;
   2252       case ARMin_NLdStQ:
   2253          if (i->ARMin.NLdStQ.isLoad)
   2254             addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
   2255          else
   2256             addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
   2257          addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
   2258          return;
   2259       case ARMin_NLdStD:
   2260          if (i->ARMin.NLdStD.isLoad)
   2261             addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
   2262          else
   2263             addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
   2264          addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
   2265          return;
   2266       case ARMin_NUnary:
   2267          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
   2268          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
   2269          return;
   2270       case ARMin_NUnaryS:
   2271          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
   2272          addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
   2273          return;
   2274       case ARMin_NShift:
   2275          addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
   2276          addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
   2277          addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
   2278          return;
   2279       case ARMin_NShl64:
   2280          addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
   2281          addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
   2282          return;
   2283       case ARMin_NDual:
   2284          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
   2285          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
   2286          addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
   2287          addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
   2288          return;
   2289       case ARMin_NBinary:
   2290          addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
   2291          /* TODO: sometimes dst is also being read! */
   2292          // XXX fix this
   2293          addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
   2294          addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
   2295          return;
   2296       case ARMin_NeonImm:
   2297          addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
   2298          return;
   2299       case ARMin_NCMovQ:
   2300          addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
   2301          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
   2302          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
   2303          return;
   2304       case ARMin_Add32:
   2305          addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
   2306          addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
   2307          return;
   2308       case ARMin_EvCheck:
   2309          /* We expect both amodes only to mention r8, so this is in
   2310             fact pointless, since r8 isn't allocatable, but
   2311             anyway.. */
   2312          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
   2313          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
   2314          addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
   2315          return;
   2316       case ARMin_ProfInc:
   2317          addHRegUse(u, HRmWrite, hregARM_R12());
   2318          addHRegUse(u, HRmWrite, hregARM_R11());
   2319          return;
   2320       default:
   2321          ppARMInstr(i);
   2322          vpanic("getRegUsage_ARMInstr");
   2323    }
   2324 }
   2325 
   2326 
   2327 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
   2328 {
   2329    vassert(mode64 == False);
   2330    switch (i->tag) {
   2331       case ARMin_Alu:
   2332          i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
   2333          i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
   2334          mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
   2335          return;
   2336       case ARMin_Shift:
   2337          i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
   2338          i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
   2339          mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
   2340          return;
   2341       case ARMin_Unary:
   2342          i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
   2343          i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
   2344          return;
   2345       case ARMin_CmpOrTst:
   2346          i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
   2347          mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
   2348          return;
   2349       case ARMin_Mov:
   2350          i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
   2351          mapRegs_ARMRI84(m, i->ARMin.Mov.src);
   2352          return;
   2353       case ARMin_Imm32:
   2354          i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
   2355          return;
   2356       case ARMin_LdSt32:
   2357          i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
   2358          mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
   2359          return;
   2360       case ARMin_LdSt16:
   2361          i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
   2362          mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
   2363          return;
   2364       case ARMin_LdSt8U:
   2365          i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
   2366          mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
   2367          return;
   2368       case ARMin_Ld8S:
   2369          i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
   2370          mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
   2371          return;
   2372       case ARMin_XDirect:
   2373          mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
   2374          return;
   2375       case ARMin_XIndir:
   2376          i->ARMin.XIndir.dstGA
   2377             = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
   2378          mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
   2379          return;
   2380       case ARMin_XAssisted:
   2381          i->ARMin.XAssisted.dstGA
   2382             = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
   2383          mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
   2384          return;
   2385       case ARMin_CMov:
   2386          i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
   2387          mapRegs_ARMRI84(m, i->ARMin.CMov.src);
   2388          return;
   2389       case ARMin_Call:
   2390          return;
   2391       case ARMin_Mul:
   2392          return;
   2393       case ARMin_LdrEX:
   2394          return;
   2395       case ARMin_StrEX:
   2396          return;
   2397       case ARMin_VLdStD:
   2398          i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
   2399          mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
   2400          return;
   2401       case ARMin_VLdStS:
   2402          i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
   2403          mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
   2404          return;
   2405       case ARMin_VAluD:
   2406          i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
   2407          i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
   2408          i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
   2409          return;
   2410       case ARMin_VAluS:
   2411          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
   2412          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
   2413          i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
   2414          return;
   2415       case ARMin_VUnaryD:
   2416          i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
   2417          i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
   2418          return;
   2419       case ARMin_VUnaryS:
   2420          i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
   2421          i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
   2422          return;
   2423       case ARMin_VCmpD:
   2424          i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
   2425          i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
   2426          return;
   2427       case ARMin_VCMovD:
   2428          i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
   2429          i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
   2430          return;
   2431       case ARMin_VCMovS:
   2432          i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
   2433          i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
   2434          return;
   2435       case ARMin_VCvtSD:
   2436          i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
   2437          i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
   2438          return;
   2439       case ARMin_VXferD:
   2440          i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
   2441          i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
   2442          i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
   2443          return;
   2444       case ARMin_VXferS:
   2445          i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
   2446          i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
   2447          return;
   2448       case ARMin_VCvtID:
   2449          i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
   2450          i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
   2451          return;
   2452       case ARMin_FPSCR:
   2453          i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
   2454          return;
   2455       case ARMin_MFence:
   2456          return;
   2457       case ARMin_CLREX:
   2458          return;
   2459       case ARMin_NLdStQ:
   2460          i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
   2461          mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
   2462          return;
   2463       case ARMin_NLdStD:
   2464          i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
   2465          mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
   2466          return;
   2467       case ARMin_NUnary:
   2468          i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
   2469          i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
   2470          return;
   2471       case ARMin_NUnaryS:
   2472          i->ARMin.NUnaryS.src->reg
   2473             = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
   2474          i->ARMin.NUnaryS.dst->reg
   2475             = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
   2476          return;
   2477       case ARMin_NShift:
   2478          i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
   2479          i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
   2480          i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
   2481          return;
   2482       case ARMin_NShl64:
   2483          i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
   2484          i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
   2485          return;
   2486       case ARMin_NDual:
   2487          i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
   2488          i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
   2489          return;
   2490       case ARMin_NBinary:
   2491          i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
   2492          i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
   2493          i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
   2494          return;
   2495       case ARMin_NeonImm:
   2496          i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
   2497          return;
   2498       case ARMin_NCMovQ:
   2499          i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
   2500          i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
   2501          return;
   2502       case ARMin_Add32:
   2503          i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
   2504          i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
   2505          return;
   2506       case ARMin_EvCheck:
   2507          /* We expect both amodes only to mention r8, so this is in
   2508             fact pointless, since r8 isn't allocatable, but
   2509             anyway.. */
   2510          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
   2511          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
   2512          return;
   2513       case ARMin_ProfInc:
   2514          /* hardwires r11 and r12 -- nothing to modify. */
   2515          return;
   2516       default:
   2517          ppARMInstr(i);
   2518          vpanic("mapRegs_ARMInstr");
   2519    }
   2520 }
   2521 
   2522 /* Figure out if i represents a reg-reg move, and if so assign the
   2523    source and destination to *src and *dst.  If in doubt say No.  Used
   2524    by the register allocator to do move coalescing.
   2525 */
   2526 Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
   2527 {
   2528    /* Moves between integer regs */
   2529    switch (i->tag) {
   2530       case ARMin_Mov:
   2531          if (i->ARMin.Mov.src->tag == ARMri84_R) {
   2532             *src = i->ARMin.Mov.src->ARMri84.R.reg;
   2533             *dst = i->ARMin.Mov.dst;
   2534             return True;
   2535          }
   2536          break;
   2537       case ARMin_VUnaryD:
   2538          if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
   2539             *src = i->ARMin.VUnaryD.src;
   2540             *dst = i->ARMin.VUnaryD.dst;
   2541             return True;
   2542          }
   2543          break;
   2544       case ARMin_VUnaryS:
   2545          if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
   2546             *src = i->ARMin.VUnaryS.src;
   2547             *dst = i->ARMin.VUnaryS.dst;
   2548             return True;
   2549          }
   2550          break;
   2551       case ARMin_NUnary:
   2552          if (i->ARMin.NUnary.op == ARMneon_COPY) {
   2553             *src = i->ARMin.NUnary.src;
   2554             *dst = i->ARMin.NUnary.dst;
   2555             return True;
   2556          }
   2557          break;
   2558       default:
   2559          break;
   2560    }
   2561 
   2562    return False;
   2563 }
   2564 
   2565 
   2566 /* Generate arm spill/reload instructions under the direction of the
   2567    register allocator.  Note it's critical these don't write the
   2568    condition codes. */
   2569 
   2570 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2571                     HReg rreg, Int offsetB, Bool mode64 )
   2572 {
   2573    HRegClass rclass;
   2574    vassert(offsetB >= 0);
   2575    vassert(!hregIsVirtual(rreg));
   2576    vassert(mode64 == False);
   2577    *i1 = *i2 = NULL;
   2578    rclass = hregClass(rreg);
   2579    switch (rclass) {
   2580       case HRcInt32:
   2581          vassert(offsetB <= 4095);
   2582          *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
   2583                                 rreg,
   2584                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2585          return;
   2586       case HRcFlt32:
   2587       case HRcFlt64: {
   2588          HReg r8   = hregARM_R8();  /* baseblock */
   2589          HReg r12  = hregARM_R12(); /* spill temp */
   2590          HReg base = r8;
   2591          vassert(0 == (offsetB & 3));
   2592          if (offsetB >= 1024) {
   2593             Int offsetKB = offsetB / 1024;
   2594             /* r12 = r8 + (1024 * offsetKB) */
   2595             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2596                                ARMRI84_I84(offsetKB, 11));
   2597             offsetB -= (1024 * offsetKB);
   2598             base = r12;
   2599          }
   2600          vassert(offsetB <= 1020);
   2601          if (rclass == HRcFlt32) {
   2602             *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
   2603                                    rreg,
   2604                                    mkARMAModeV(base, offsetB) );
   2605          } else {
   2606             *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
   2607                                    rreg,
   2608                                    mkARMAModeV(base, offsetB) );
   2609          }
   2610          return;
   2611       }
   2612       case HRcVec128: {
   2613          HReg r8  = hregARM_R8();
   2614          HReg r12 = hregARM_R12();
   2615          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2616          *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
   2617          return;
   2618       }
   2619       default:
   2620          ppHRegClass(rclass);
   2621          vpanic("genSpill_ARM: unimplemented regclass");
   2622    }
   2623 }
   2624 
   2625 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2626                      HReg rreg, Int offsetB, Bool mode64 )
   2627 {
   2628    HRegClass rclass;
   2629    vassert(offsetB >= 0);
   2630    vassert(!hregIsVirtual(rreg));
   2631    vassert(mode64 == False);
   2632    *i1 = *i2 = NULL;
   2633    rclass = hregClass(rreg);
   2634    switch (rclass) {
   2635       case HRcInt32:
   2636          vassert(offsetB <= 4095);
   2637          *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
   2638                                 rreg,
   2639                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2640          return;
   2641       case HRcFlt32:
   2642       case HRcFlt64: {
   2643          HReg r8   = hregARM_R8();  /* baseblock */
   2644          HReg r12  = hregARM_R12(); /* spill temp */
   2645          HReg base = r8;
   2646          vassert(0 == (offsetB & 3));
   2647          if (offsetB >= 1024) {
   2648             Int offsetKB = offsetB / 1024;
   2649             /* r12 = r8 + (1024 * offsetKB) */
   2650             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2651                                ARMRI84_I84(offsetKB, 11));
   2652             offsetB -= (1024 * offsetKB);
   2653             base = r12;
   2654          }
   2655          vassert(offsetB <= 1020);
   2656          if (rclass == HRcFlt32) {
   2657             *i2 = ARMInstr_VLdStS( True/*isLoad*/,
   2658                                    rreg,
   2659                                    mkARMAModeV(base, offsetB) );
   2660          } else {
   2661             *i2 = ARMInstr_VLdStD( True/*isLoad*/,
   2662                                    rreg,
   2663                                    mkARMAModeV(base, offsetB) );
   2664          }
   2665          return;
   2666       }
   2667       case HRcVec128: {
   2668          HReg r8  = hregARM_R8();
   2669          HReg r12 = hregARM_R12();
   2670          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2671          *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
   2672          return;
   2673       }
   2674       default:
   2675          ppHRegClass(rclass);
   2676          vpanic("genReload_ARM: unimplemented regclass");
   2677    }
   2678 }
   2679 
   2680 
   2681 /* Emit an instruction into buf and return the number of bytes used.
   2682    Note that buf is not the insn's final place, and therefore it is
   2683    imperative to emit position-independent code. */
   2684 
   2685 static inline UChar iregNo ( HReg r )
   2686 {
   2687    UInt n;
   2688    vassert(hregClass(r) == HRcInt32);
   2689    vassert(!hregIsVirtual(r));
   2690    n = hregNumber(r);
   2691    vassert(n <= 15);
   2692    return toUChar(n);
   2693 }
   2694 
   2695 static inline UChar dregNo ( HReg r )
   2696 {
   2697    UInt n;
   2698    if (hregClass(r) != HRcFlt64)
   2699       ppHRegClass(hregClass(r));
   2700    vassert(hregClass(r) == HRcFlt64);
   2701    vassert(!hregIsVirtual(r));
   2702    n = hregNumber(r);
   2703    vassert(n <= 31);
   2704    return toUChar(n);
   2705 }
   2706 
   2707 static inline UChar fregNo ( HReg r )
   2708 {
   2709    UInt n;
   2710    vassert(hregClass(r) == HRcFlt32);
   2711    vassert(!hregIsVirtual(r));
   2712    n = hregNumber(r);
   2713    vassert(n <= 31);
   2714    return toUChar(n);
   2715 }
   2716 
   2717 static inline UChar qregNo ( HReg r )
   2718 {
   2719    UInt n;
   2720    vassert(hregClass(r) == HRcVec128);
   2721    vassert(!hregIsVirtual(r));
   2722    n = hregNumber(r);
   2723    vassert(n <= 15);
   2724    return toUChar(n);
   2725 }
   2726 
   2727 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
   2728    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
   2729 #define X0000  BITS4(0,0,0,0)
   2730 #define X0001  BITS4(0,0,0,1)
   2731 #define X0010  BITS4(0,0,1,0)
   2732 #define X0011  BITS4(0,0,1,1)
   2733 #define X0100  BITS4(0,1,0,0)
   2734 #define X0101  BITS4(0,1,0,1)
   2735 #define X0110  BITS4(0,1,1,0)
   2736 #define X0111  BITS4(0,1,1,1)
   2737 #define X1000  BITS4(1,0,0,0)
   2738 #define X1001  BITS4(1,0,0,1)
   2739 #define X1010  BITS4(1,0,1,0)
   2740 #define X1011  BITS4(1,0,1,1)
   2741 #define X1100  BITS4(1,1,0,0)
   2742 #define X1101  BITS4(1,1,0,1)
   2743 #define X1110  BITS4(1,1,1,0)
   2744 #define X1111  BITS4(1,1,1,1)
   2745 
   2746 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
   2747    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2748     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2749     (((zzx3) & 0xF) << 12))
   2750 
   2751 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
   2752    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2753     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2754     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
   2755 
   2756 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
   2757    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2758     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2759     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
   2760 
   2761 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
   2762   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
   2763    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
   2764    (((zzx0) & 0xF) << 0))
   2765 
   2766 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
   2767    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2768     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2769     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
   2770     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
   2771 
   2772 #define XX______(zzx7,zzx6) \
   2773    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
   2774 
   2775 /* Generate a skeletal insn that involves an a RI84 shifter operand.
   2776    Returns a word which is all zeroes apart from bits 25 and 11..0,
   2777    since it is those that encode the shifter operand (at least to the
   2778    extent that we care about it.) */
   2779 static UInt skeletal_RI84 ( ARMRI84* ri )
   2780 {
   2781    UInt instr;
   2782    if (ri->tag == ARMri84_I84) {
   2783       vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
   2784       vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
   2785       instr = 1 << 25;
   2786       instr |= (ri->ARMri84.I84.imm4 << 8);
   2787       instr |= ri->ARMri84.I84.imm8;
   2788    } else {
   2789       instr = 0 << 25;
   2790       instr |= iregNo(ri->ARMri84.R.reg);
   2791    }
   2792    return instr;
   2793 }
   2794 
   2795 /* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
   2796    11..7. */
   2797 static UInt skeletal_RI5 ( ARMRI5* ri )
   2798 {
   2799    UInt instr;
   2800    if (ri->tag == ARMri5_I5) {
   2801       UInt imm5 = ri->ARMri5.I5.imm5;
   2802       vassert(imm5 >= 1 && imm5 <= 31);
   2803       instr = 0 << 4;
   2804       instr |= imm5 << 7;
   2805    } else {
   2806       instr = 1 << 4;
   2807       instr |= iregNo(ri->ARMri5.R.reg) << 8;
   2808    }
   2809    return instr;
   2810 }
   2811 
   2812 
   2813 /* Get an immediate into a register, using only that
   2814    register.  (very lame..) */
   2815 static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
   2816 {
   2817    UInt instr;
   2818    vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
   2819 #if 0
   2820    if (0 == (imm32 & ~0xFF)) {
   2821       /* mov with a immediate shifter operand of (0, imm32) (??) */
   2822       instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
   2823       instr |= imm32;
   2824       *p++ = instr;
   2825    } else {
   2826       // this is very bad; causes Dcache pollution
   2827       // ldr  rD, [pc]
   2828       instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
   2829       *p++ = instr;
   2830       // b .+8
   2831       instr = 0xEA000000;
   2832       *p++ = instr;
   2833       // .word imm32
   2834       *p++ = imm32;
   2835    }
   2836 #else
   2837    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2838       /* Generate movw rD, #low16.  Then, if the high 16 are
   2839          nonzero, generate movt rD, #high16. */
   2840       UInt lo16 = imm32 & 0xFFFF;
   2841       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2842       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2843                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2844                        lo16 & 0xF);
   2845       *p++ = instr;
   2846       if (hi16 != 0) {
   2847          instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2848                           (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2849                           hi16 & 0xF);
   2850          *p++ = instr;
   2851       }
   2852    } else {
   2853       UInt imm, rot;
   2854       UInt op = X1010;
   2855       UInt rN = 0;
   2856       if ((imm32 & 0xFF) || (imm32 == 0)) {
   2857          imm = imm32 & 0xFF;
   2858          rot = 0;
   2859          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2860          *p++ = instr;
   2861          op = X1000;
   2862          rN = rD;
   2863       }
   2864       if (imm32 & 0xFF000000) {
   2865          imm = (imm32 >> 24) & 0xFF;
   2866          rot = 4;
   2867          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2868          *p++ = instr;
   2869          op = X1000;
   2870          rN = rD;
   2871       }
   2872       if (imm32 & 0xFF0000) {
   2873          imm = (imm32 >> 16) & 0xFF;
   2874          rot = 8;
   2875          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2876          *p++ = instr;
   2877          op = X1000;
   2878          rN = rD;
   2879       }
   2880       if (imm32 & 0xFF00) {
   2881          imm = (imm32 >> 8) & 0xFF;
   2882          rot = 12;
   2883          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2884          *p++ = instr;
   2885          op = X1000;
   2886          rN = rD;
   2887       }
   2888    }
   2889 #endif
   2890    return p;
   2891 }
   2892 
   2893 /* Get an immediate into a register, using only that register, and
   2894    generating exactly 2 instructions, regardless of the value of the
   2895    immediate. This is used when generating sections of code that need
   2896    to be patched later, so as to guarantee a specific size. */
   2897 static UInt* imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
   2898 {
   2899    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2900       /* Generate movw rD, #low16 ;  movt rD, #high16. */
   2901       UInt lo16 = imm32 & 0xFFFF;
   2902       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2903       UInt instr;
   2904       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2905                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2906                        lo16 & 0xF);
   2907       *p++ = instr;
   2908       instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2909                        (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2910                        hi16 & 0xF);
   2911       *p++ = instr;
   2912    } else {
   2913       vassert(0); /* lose */
   2914    }
   2915    return p;
   2916 }
   2917 
   2918 /* Check whether p points at a 2-insn sequence cooked up by
   2919    imm32_to_iregNo_EXACTLY2(). */
   2920 static Bool is_imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
   2921 {
   2922    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2923       /* Generate movw rD, #low16 ;  movt rD, #high16. */
   2924       UInt lo16 = imm32 & 0xFFFF;
   2925       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2926       UInt i0, i1;
   2927       i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2928                     (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2929                     lo16 & 0xF);
   2930       i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2931                     (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2932                     hi16 & 0xF);
   2933       return p[0] == i0 && p[1] == i1;
   2934    } else {
   2935       vassert(0); /* lose */
   2936    }
   2937 }
   2938 
   2939 
   2940 static UInt* do_load_or_store32 ( UInt* p,
   2941                                   Bool isLoad, UInt rD, ARMAMode1* am )
   2942 {
   2943    vassert(rD <= 12);
   2944    vassert(am->tag == ARMam1_RI); // RR case is not handled
   2945    UInt bB = 0;
   2946    UInt bL = isLoad ? 1 : 0;
   2947    Int  simm12;
   2948    UInt instr, bP;
   2949    if (am->ARMam1.RI.simm13 < 0) {
   2950       bP = 0;
   2951       simm12 = -am->ARMam1.RI.simm13;
   2952    } else {
   2953       bP = 1;
   2954       simm12 = am->ARMam1.RI.simm13;
   2955    }
   2956    vassert(simm12 >= 0 && simm12 <= 4095);
   2957    instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
   2958                     iregNo(am->ARMam1.RI.reg),
   2959                     rD);
   2960    instr |= simm12;
   2961    *p++ = instr;
   2962    return p;
   2963 }
   2964 
   2965 
   2966 /* Emit an instruction into buf and return the number of bytes used.
   2967    Note that buf is not the insn's final place, and therefore it is
   2968    imperative to emit position-independent code.  If the emitted
   2969    instruction was a profiler inc, set *is_profInc to True, else
   2970    leave it unchanged. */
   2971 
   2972 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
   2973                     UChar* buf, Int nbuf, ARMInstr* i,
   2974                     Bool mode64,
   2975                     void* disp_cp_chain_me_to_slowEP,
   2976                     void* disp_cp_chain_me_to_fastEP,
   2977                     void* disp_cp_xindir,
   2978                     void* disp_cp_xassisted )
   2979 {
   2980    UInt* p = (UInt*)buf;
   2981    vassert(nbuf >= 32);
   2982    vassert(mode64 == False);
   2983    vassert(0 == (((HWord)buf) & 3));
   2984 
   2985    switch (i->tag) {
   2986       case ARMin_Alu: {
   2987          UInt     instr, subopc;
   2988          UInt     rD   = iregNo(i->ARMin.Alu.dst);
   2989          UInt     rN   = iregNo(i->ARMin.Alu.argL);
   2990          ARMRI84* argR = i->ARMin.Alu.argR;
   2991          switch (i->ARMin.Alu.op) {
   2992             case ARMalu_ADDS: /* fallthru */
   2993             case ARMalu_ADD:  subopc = X0100; break;
   2994             case ARMalu_ADC:  subopc = X0101; break;
   2995             case ARMalu_SUBS: /* fallthru */
   2996             case ARMalu_SUB:  subopc = X0010; break;
   2997             case ARMalu_SBC:  subopc = X0110; break;
   2998             case ARMalu_AND:  subopc = X0000; break;
   2999             case ARMalu_BIC:  subopc = X1110; break;
   3000             case ARMalu_OR:   subopc = X1100; break;
   3001             case ARMalu_XOR:  subopc = X0001; break;
   3002             default: goto bad;
   3003          }
   3004          instr = skeletal_RI84(argR);
   3005          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3006                            (subopc << 1) & 0xF, rN, rD);
   3007          if (i->ARMin.Alu.op == ARMalu_ADDS
   3008              || i->ARMin.Alu.op == ARMalu_SUBS) {
   3009             instr |= 1<<20;  /* set the S bit */
   3010          }
   3011          *p++ = instr;
   3012          goto done;
   3013       }
   3014       case ARMin_Shift: {
   3015          UInt    instr, subopc;
   3016          UInt    rD   = iregNo(i->ARMin.Shift.dst);
   3017          UInt    rM   = iregNo(i->ARMin.Shift.argL);
   3018          ARMRI5* argR = i->ARMin.Shift.argR;
   3019          switch (i->ARMin.Shift.op) {
   3020             case ARMsh_SHL: subopc = X0000; break;
   3021             case ARMsh_SHR: subopc = X0001; break;
   3022             case ARMsh_SAR: subopc = X0010; break;
   3023             default: goto bad;
   3024          }
   3025          instr = skeletal_RI5(argR);
   3026          instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
   3027          instr |= (subopc & 3) << 5;
   3028          *p++ = instr;
   3029          goto done;
   3030       }
   3031       case ARMin_Unary: {
   3032          UInt instr;
   3033          UInt rDst = iregNo(i->ARMin.Unary.dst);
   3034          UInt rSrc = iregNo(i->ARMin.Unary.src);
   3035          switch (i->ARMin.Unary.op) {
   3036             case ARMun_CLZ:
   3037                instr = XXXXXXXX(X1110,X0001,X0110,X1111,
   3038                                 rDst,X1111,X0001,rSrc);
   3039                *p++ = instr;
   3040                goto done;
   3041             case ARMun_NEG: /* RSB rD,rS,#0 */
   3042                instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
   3043                *p++ = instr;
   3044                goto done;
   3045             case ARMun_NOT: {
   3046                UInt subopc = X1111; /* MVN */
   3047                instr = rSrc;
   3048                instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3049                                  (subopc << 1) & 0xF, 0, rDst);
   3050                *p++ = instr;
   3051                goto done;
   3052             }
   3053             default:
   3054                break;
   3055          }
   3056          goto bad;
   3057       }
   3058       case ARMin_CmpOrTst: {
   3059          UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
   3060          UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
   3061          UInt SBZ    = 0;
   3062          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3063                            ((subopc << 1) & 0xF) | 1,
   3064                            iregNo(i->ARMin.CmpOrTst.argL), SBZ );
   3065          *p++ = instr;
   3066          goto done;
   3067       }
   3068       case ARMin_Mov: {
   3069          UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
   3070          UInt subopc = X1101; /* MOV */
   3071          UInt SBZ    = 0;
   3072          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3073                            (subopc << 1) & 0xF, SBZ,
   3074                            iregNo(i->ARMin.Mov.dst));
   3075          *p++ = instr;
   3076          goto done;
   3077       }
   3078       case ARMin_Imm32: {
   3079          p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
   3080                                         i->ARMin.Imm32.imm32 );
   3081          goto done;
   3082       }
   3083       case ARMin_LdSt32:
   3084       case ARMin_LdSt8U: {
   3085          UInt        bL, bB;
   3086          HReg        rD;
   3087          ARMAMode1*  am;
   3088          ARMCondCode cc;
   3089          if (i->tag == ARMin_LdSt32) {
   3090             bB = 0;
   3091             bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
   3092             am = i->ARMin.LdSt32.amode;
   3093             rD = i->ARMin.LdSt32.rD;
   3094             cc = i->ARMin.LdSt32.cc;
   3095          } else {
   3096             bB = 1;
   3097             bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
   3098             am = i->ARMin.LdSt8U.amode;
   3099             rD = i->ARMin.LdSt8U.rD;
   3100             cc = i->ARMin.LdSt8U.cc;
   3101          }
   3102          vassert(cc != ARMcc_NV);
   3103          if (am->tag == ARMam1_RI) {
   3104             Int  simm12;
   3105             UInt instr, bP;
   3106             if (am->ARMam1.RI.simm13 < 0) {
   3107                bP = 0;
   3108                simm12 = -am->ARMam1.RI.simm13;
   3109             } else {
   3110                bP = 1;
   3111                simm12 = am->ARMam1.RI.simm13;
   3112             }
   3113             vassert(simm12 >= 0 && simm12 <= 4095);
   3114             instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
   3115                              iregNo(am->ARMam1.RI.reg),
   3116                              iregNo(rD));
   3117             instr |= simm12;
   3118             *p++ = instr;
   3119             goto done;
   3120          } else {
   3121             // RR case
   3122             goto bad;
   3123          }
   3124       }
   3125       case ARMin_LdSt16: {
   3126          HReg        rD = i->ARMin.LdSt16.rD;
   3127          UInt        bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
   3128          UInt        bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
   3129          ARMAMode2*  am = i->ARMin.LdSt16.amode;
   3130          ARMCondCode cc = i->ARMin.LdSt16.cc;
   3131          vassert(cc != ARMcc_NV);
   3132          if (am->tag == ARMam2_RI) {
   3133             HReg rN = am->ARMam2.RI.reg;
   3134             Int  simm8;
   3135             UInt bP, imm8hi, imm8lo, instr;
   3136             if (am->ARMam2.RI.simm9 < 0) {
   3137                bP = 0;
   3138                simm8 = -am->ARMam2.RI.simm9;
   3139             } else {
   3140                bP = 1;
   3141                simm8 = am->ARMam2.RI.simm9;
   3142             }
   3143             vassert(simm8 >= 0 && simm8 <= 255);
   3144             imm8hi = (simm8 >> 4) & 0xF;
   3145             imm8lo = simm8 & 0xF;
   3146             vassert(!(bL == 0 && bS == 1)); // "! signed store"
   3147             /**/ if (bL == 0 && bS == 0) {
   3148                // strh
   3149                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregNo(rN),
   3150                                 iregNo(rD), imm8hi, X1011, imm8lo);
   3151                *p++ = instr;
   3152                goto done;
   3153             }
   3154             else if (bL == 1 && bS == 0) {
   3155                // ldrh
   3156                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
   3157                                 iregNo(rD), imm8hi, X1011, imm8lo);
   3158                *p++ = instr;
   3159                goto done;
   3160             }
   3161             else if (bL == 1 && bS == 1) {
   3162                // ldrsh
   3163                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
   3164                                 iregNo(rD), imm8hi, X1111, imm8lo);
   3165                *p++ = instr;
   3166                goto done;
   3167             }
   3168             else vassert(0); // ill-constructed insn
   3169          } else {
   3170             // RR case
   3171             goto bad;
   3172          }
   3173       }
   3174       case ARMin_Ld8S: {
   3175          HReg        rD = i->ARMin.Ld8S.rD;
   3176          ARMAMode2*  am = i->ARMin.Ld8S.amode;
   3177          ARMCondCode cc = i->ARMin.Ld8S.cc;
   3178          vassert(cc != ARMcc_NV);
   3179          if (am->tag == ARMam2_RI) {
   3180             HReg rN = am->ARMam2.RI.reg;
   3181             Int  simm8;
   3182             UInt bP, imm8hi, imm8lo, instr;
   3183             if (am->ARMam2.RI.simm9 < 0) {
   3184                bP = 0;
   3185                simm8 = -am->ARMam2.RI.simm9;
   3186             } else {
   3187                bP = 1;
   3188                simm8 = am->ARMam2.RI.simm9;
   3189             }
   3190             vassert(simm8 >= 0 && simm8 <= 255);
   3191             imm8hi = (simm8 >> 4) & 0xF;
   3192             imm8lo = simm8 & 0xF;
   3193             // ldrsb
   3194             instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
   3195                              iregNo(rD), imm8hi, X1101, imm8lo);
   3196             *p++ = instr;
   3197             goto done;
   3198          } else {
   3199             // RR case
   3200             goto bad;
   3201          }
   3202       }
   3203 
   3204       case ARMin_XDirect: {
   3205          /* NB: what goes on here has to be very closely coordinated
   3206             with the chainXDirect_ARM and unchainXDirect_ARM below. */
   3207          /* We're generating chain-me requests here, so we need to be
   3208             sure this is actually allowed -- no-redir translations
   3209             can't use chain-me's.  Hence: */
   3210          vassert(disp_cp_chain_me_to_slowEP != NULL);
   3211          vassert(disp_cp_chain_me_to_fastEP != NULL);
   3212 
   3213          /* Use ptmp for backpatching conditional jumps. */
   3214          UInt* ptmp = NULL;
   3215 
   3216          /* First off, if this is conditional, create a conditional
   3217             jump over the rest of it.  Or at least, leave a space for
   3218             it that we will shortly fill in. */
   3219          if (i->ARMin.XDirect.cond != ARMcc_AL) {
   3220             vassert(i->ARMin.XDirect.cond != ARMcc_NV);
   3221             ptmp = p;
   3222             *p++ = 0;
   3223          }
   3224 
   3225          /* Update the guest R15T. */
   3226          /* movw r12, lo16(dstGA) */
   3227          /* movt r12, hi16(dstGA) */
   3228          /* str r12, amR15T */
   3229          p = imm32_to_iregNo(p, /*r*/12, i->ARMin.XDirect.dstGA);
   3230          p = do_load_or_store32(p, False/*!isLoad*/,
   3231                                 /*r*/12, i->ARMin.XDirect.amR15T);
   3232 
   3233          /* --- FIRST PATCHABLE BYTE follows --- */
   3234          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
   3235             calling to) backs up the return address, so as to find the
   3236             address of the first patchable byte.  So: don't change the
   3237             number of instructions (3) below. */
   3238          /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
   3239          /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
   3240          /* blx  r12  (A1) */
   3241          void* disp_cp_chain_me
   3242                   = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
   3243                                               : disp_cp_chain_me_to_slowEP;
   3244          p = imm32_to_iregNo_EXACTLY2(p, /*r*/12,
   3245                                       (UInt)Ptr_to_ULong(disp_cp_chain_me));
   3246          *p++ = 0xE12FFF3C;
   3247          /* --- END of PATCHABLE BYTES --- */
   3248 
   3249          /* Fix up the conditional jump, if there was one. */
   3250          if (i->ARMin.XDirect.cond != ARMcc_AL) {
   3251             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3252             vassert(delta > 0 && delta < 40);
   3253             vassert((delta & 3) == 0);
   3254             UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
   3255             vassert(notCond <= 13); /* Neither AL nor NV */
   3256             delta = (delta >> 2) - 2;
   3257             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3258          }
   3259          goto done;
   3260       }
   3261 
   3262       case ARMin_XIndir: {
   3263          /* We're generating transfers that could lead indirectly to a
   3264             chain-me, so we need to be sure this is actually allowed
   3265             -- no-redir translations are not allowed to reach normal
   3266             translations without going through the scheduler.  That
   3267             means no XDirects or XIndirs out from no-redir
   3268             translations.  Hence: */
   3269          vassert(disp_cp_xindir != NULL);
   3270 
   3271          /* Use ptmp for backpatching conditional jumps. */
   3272          UInt* ptmp = NULL;
   3273 
   3274          /* First off, if this is conditional, create a conditional
   3275             jump over the rest of it.  Or at least, leave a space for
   3276             it that we will shortly fill in. */
   3277          if (i->ARMin.XIndir.cond != ARMcc_AL) {
   3278             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
   3279             ptmp = p;
   3280             *p++ = 0;
   3281          }
   3282 
   3283          /* Update the guest R15T. */
   3284          /* str r-dstGA, amR15T */
   3285          p = do_load_or_store32(p, False/*!isLoad*/,
   3286                                 iregNo(i->ARMin.XIndir.dstGA),
   3287                                 i->ARMin.XIndir.amR15T);
   3288 
   3289          /* movw r12, lo16(VG_(disp_cp_xindir)) */
   3290          /* movt r12, hi16(VG_(disp_cp_xindir)) */
   3291          /* bx   r12  (A1) */
   3292          p = imm32_to_iregNo(p, /*r*/12,
   3293                              (UInt)Ptr_to_ULong(disp_cp_xindir));
   3294          *p++ = 0xE12FFF1C;
   3295 
   3296          /* Fix up the conditional jump, if there was one. */
   3297          if (i->ARMin.XIndir.cond != ARMcc_AL) {
   3298             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3299             vassert(delta > 0 && delta < 40);
   3300             vassert((delta & 3) == 0);
   3301             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
   3302             vassert(notCond <= 13); /* Neither AL nor NV */
   3303             delta = (delta >> 2) - 2;
   3304             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3305          }
   3306          goto done;
   3307       }
   3308 
   3309       case ARMin_XAssisted: {
   3310          /* Use ptmp for backpatching conditional jumps. */
   3311          UInt* ptmp = NULL;
   3312 
   3313          /* First off, if this is conditional, create a conditional
   3314             jump over the rest of it.  Or at least, leave a space for
   3315             it that we will shortly fill in. */
   3316          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
   3317             vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
   3318             ptmp = p;
   3319             *p++ = 0;
   3320          }
   3321 
   3322          /* Update the guest R15T. */
   3323          /* str r-dstGA, amR15T */
   3324          p = do_load_or_store32(p, False/*!isLoad*/,
   3325                                 iregNo(i->ARMin.XAssisted.dstGA),
   3326                                 i->ARMin.XAssisted.amR15T);
   3327 
   3328          /* movw r8,  $magic_number */
   3329          UInt trcval = 0;
   3330          switch (i->ARMin.XAssisted.jk) {
   3331             case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
   3332             case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
   3333             //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
   3334             case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
   3335             //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
   3336             //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
   3337             case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
   3338             case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
   3339             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
   3340             //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
   3341             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
   3342             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
   3343             /* We don't expect to see the following being assisted. */
   3344             //case Ijk_Ret:
   3345             //case Ijk_Call:
   3346             /* fallthrough */
   3347             default:
   3348                ppIRJumpKind(i->ARMin.XAssisted.jk);
   3349                vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
   3350          }
   3351          vassert(trcval != 0);
   3352          p = imm32_to_iregNo(p, /*r*/8, trcval);
   3353 
   3354          /* movw r12, lo16(VG_(disp_cp_xassisted)) */
   3355          /* movt r12, hi16(VG_(disp_cp_xassisted)) */
   3356          /* bx   r12  (A1) */
   3357          p = imm32_to_iregNo(p, /*r*/12,
   3358                              (UInt)Ptr_to_ULong(disp_cp_xassisted));
   3359          *p++ = 0xE12FFF1C;
   3360 
   3361          /* Fix up the conditional jump, if there was one. */
   3362          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
   3363             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3364             vassert(delta > 0 && delta < 40);
   3365             vassert((delta & 3) == 0);
   3366             UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
   3367             vassert(notCond <= 13); /* Neither AL nor NV */
   3368             delta = (delta >> 2) - 2;
   3369             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3370          }
   3371          goto done;
   3372       }
   3373 
   3374       case ARMin_CMov: {
   3375          UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
   3376          UInt subopc = X1101; /* MOV */
   3377          UInt SBZ    = 0;
   3378          instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
   3379                            (subopc << 1) & 0xF, SBZ,
   3380                            iregNo(i->ARMin.CMov.dst));
   3381          *p++ = instr;
   3382          goto done;
   3383       }
   3384 
   3385       case ARMin_Call: {
   3386          UInt instr;
   3387          /* Decide on a scratch reg used to hold to the call address.
   3388             This has to be done as per the comments in getRegUsage. */
   3389          Int scratchNo;
   3390          switch (i->ARMin.Call.nArgRegs) {
   3391             case 0:  scratchNo = 0;  break;
   3392             case 1:  scratchNo = 1;  break;
   3393             case 2:  scratchNo = 2;  break;
   3394             case 3:  scratchNo = 3;  break;
   3395             case 4:  scratchNo = 11; break;
   3396             default: vassert(0);
   3397          }
   3398          /* If we don't need to do any fixup actions in the case that
   3399             the call doesn't happen, just do the simple thing and emit
   3400             straight-line code.  We hope this is the common case. */
   3401          if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
   3402              || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
   3403             // r"scratchNo" = &target
   3404             p = imm32_to_iregNo( (UInt*)p,
   3405                                  scratchNo, (UInt)i->ARMin.Call.target );
   3406             // blx{cond} r"scratchNo"
   3407             instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
   3408                              X0011, scratchNo);
   3409             instr |= 0xFFF << 8; // stick in the SBOnes
   3410             *p++ = instr;
   3411          } else {
   3412             Int delta;
   3413             /* Complex case.  We have to generate an if-then-else
   3414                diamond. */
   3415             // before:
   3416             //   b{!cond} else:
   3417             //   r"scratchNo" = &target
   3418             //   blx{AL} r"scratchNo"
   3419             // preElse:
   3420             //   b after:
   3421             // else:
   3422             //   mov r0, #0x55555555  // possibly
   3423             //   mov r1, r0           // possibly
   3424             // after:
   3425 
   3426             // before:
   3427             UInt* pBefore = p;
   3428 
   3429             //   b{!cond} else:  // ptmp1 points here
   3430             *p++ = 0; // filled in later
   3431 
   3432             //   r"scratchNo" = &target
   3433             p = imm32_to_iregNo( (UInt*)p,
   3434                                  scratchNo, (UInt)i->ARMin.Call.target );
   3435 
   3436             //   blx{AL} r"scratchNo"
   3437             instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
   3438                              X0011, scratchNo);
   3439             instr |= 0xFFF << 8; // stick in the SBOnes
   3440             *p++ = instr;
   3441 
   3442             // preElse:
   3443             UInt* pPreElse = p;
   3444 
   3445             //   b after:
   3446             *p++ = 0; // filled in later
   3447 
   3448             // else:
   3449             delta = (UChar*)p - (UChar*)pBefore;
   3450             delta = (delta >> 2) - 2;
   3451             *pBefore
   3452                = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
   3453 
   3454             /* Do the 'else' actions */
   3455             switch (i->ARMin.Call.rloc.pri) {
   3456                case RLPri_Int:
   3457                   p = imm32_to_iregNo_EXACTLY2(p, /*r*/0, 0x55555555);
   3458                   break;
   3459                case RLPri_2Int:
   3460                   vassert(0); //ATC
   3461                   p = imm32_to_iregNo_EXACTLY2(p, /*r*/0, 0x55555555);
   3462                   /* mov r1, r0 */
   3463                   *p++ = 0xE1A01000;
   3464                   break;
   3465                case RLPri_None: case RLPri_INVALID: default:
   3466                   vassert(0);
   3467             }
   3468 
   3469             // after:
   3470             delta = (UChar*)p - (UChar*)pPreElse;
   3471             delta = (delta >> 2) - 2;
   3472             *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
   3473          }
   3474 
   3475          goto done;
   3476       }
   3477 
   3478       case ARMin_Mul: {
   3479          /* E0000392   mul     r0, r2, r3
   3480             E0810392   umull   r0(LO), r1(HI), r2, r3
   3481             E0C10392   smull   r0(LO), r1(HI), r2, r3
   3482          */
   3483          switch (i->ARMin.Mul.op) {
   3484             case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
   3485             case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
   3486             case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
   3487             default: vassert(0);
   3488          }
   3489          goto bad;
   3490       }
   3491       case ARMin_LdrEX: {
   3492          /* E1D42F9F   ldrexb r2, [r4]
   3493             E1F42F9F   ldrexh r2, [r4]
   3494             E1942F9F   ldrex  r2, [r4]
   3495             E1B42F9F   ldrexd r2, r3, [r4]
   3496          */
   3497          switch (i->ARMin.LdrEX.szB) {
   3498             case 1: *p++ = 0xE1D42F9F; goto done;
   3499             case 2: *p++ = 0xE1F42F9F; goto done;
   3500             case 4: *p++ = 0xE1942F9F; goto done;
   3501             case 8: *p++ = 0xE1B42F9F; goto done;
   3502             default: break;
   3503          }
   3504          goto bad;
   3505       }
   3506       case ARMin_StrEX: {
   3507          /* E1C40F92   strexb r0, r2, [r4]
   3508             E1E40F92   strexh r0, r2, [r4]
   3509             E1840F92   strex  r0, r2, [r4]
   3510             E1A40F92   strexd r0, r2, r3, [r4]
   3511          */
   3512          switch (i->ARMin.StrEX.szB) {
   3513             case 1: *p++ = 0xE1C40F92; goto done;
   3514             case 2: *p++ = 0xE1E40F92; goto done;
   3515             case 4: *p++ = 0xE1840F92; goto done;
   3516             case 8: *p++ = 0xE1A40F92; goto done;
   3517             default: break;
   3518          }
   3519          goto bad;
   3520       }
   3521       case ARMin_VLdStD: {
   3522          UInt dD     = dregNo(i->ARMin.VLdStD.dD);
   3523          UInt rN     = iregNo(i->ARMin.VLdStD.amode->reg);
   3524          Int  simm11 = i->ARMin.VLdStD.amode->simm11;
   3525          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3526          UInt bU     = simm11 >= 0 ? 1 : 0;
   3527          UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
   3528          UInt insn;
   3529          vassert(0 == (off8 & 3));
   3530          off8 >>= 2;
   3531          vassert(0 == (off8 & 0xFFFFFF00));
   3532          insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
   3533          insn |= off8;
   3534          *p++ = insn;
   3535          goto done;
   3536       }
   3537       case ARMin_VLdStS: {
   3538          UInt fD     = fregNo(i->ARMin.VLdStS.fD);
   3539          UInt rN     = iregNo(i->ARMin.VLdStS.amode->reg);
   3540          Int  simm11 = i->ARMin.VLdStS.amode->simm11;
   3541          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3542          UInt bU     = simm11 >= 0 ? 1 : 0;
   3543          UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
   3544          UInt bD     = fD & 1;
   3545          UInt insn;
   3546          vassert(0 == (off8 & 3));
   3547          off8 >>= 2;
   3548          vassert(0 == (off8 & 0xFFFFFF00));
   3549          insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
   3550          insn |= off8;
   3551          *p++ = insn;
   3552          goto done;
   3553       }
   3554       case ARMin_VAluD: {
   3555          UInt dN = dregNo(i->ARMin.VAluD.argL);
   3556          UInt dD = dregNo(i->ARMin.VAluD.dst);
   3557          UInt dM = dregNo(i->ARMin.VAluD.argR);
   3558          UInt pqrs = X1111; /* undefined */
   3559          switch (i->ARMin.VAluD.op) {
   3560             case ARMvfp_ADD: pqrs = X0110; break;
   3561             case ARMvfp_SUB: pqrs = X0111; break;
   3562             case ARMvfp_MUL: pqrs = X0100; break;
   3563             case ARMvfp_DIV: pqrs = X1000; break;
   3564             default: goto bad;
   3565          }
   3566          vassert(pqrs != X1111);
   3567          UInt bP  = (pqrs >> 3) & 1;
   3568          UInt bQ  = (pqrs >> 2) & 1;
   3569          UInt bR  = (pqrs >> 1) & 1;
   3570          UInt bS  = (pqrs >> 0) & 1;
   3571          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
   3572                               X1011, BITS4(0,bS,0,0), dM);
   3573          *p++ = insn;
   3574          goto done;
   3575       }
   3576       case ARMin_VAluS: {
   3577          UInt dN = fregNo(i->ARMin.VAluS.argL);
   3578          UInt dD = fregNo(i->ARMin.VAluS.dst);
   3579          UInt dM = fregNo(i->ARMin.VAluS.argR);
   3580          UInt bN = dN & 1;
   3581          UInt bD = dD & 1;
   3582          UInt bM = dM & 1;
   3583          UInt pqrs = X1111; /* undefined */
   3584          switch (i->ARMin.VAluS.op) {
   3585             case ARMvfp_ADD: pqrs = X0110; break;
   3586             case ARMvfp_SUB: pqrs = X0111; break;
   3587             case ARMvfp_MUL: pqrs = X0100; break;
   3588             case ARMvfp_DIV: pqrs = X1000; break;
   3589             default: goto bad;
   3590          }
   3591          vassert(pqrs != X1111);
   3592          UInt bP  = (pqrs >> 3) & 1;
   3593          UInt bQ  = (pqrs >> 2) & 1;
   3594          UInt bR  = (pqrs >> 1) & 1;
   3595          UInt bS  = (pqrs >> 0) & 1;
   3596          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
   3597                               (dN >> 1), (dD >> 1),
   3598                               X1010, BITS4(bN,bS,bM,0), (dM >> 1));
   3599          *p++ = insn;
   3600          goto done;
   3601       }
   3602       case ARMin_VUnaryD: {
   3603          UInt dD   = dregNo(i->ARMin.VUnaryD.dst);
   3604          UInt dM   = dregNo(i->ARMin.VUnaryD.src);
   3605          UInt insn = 0;
   3606          switch (i->ARMin.VUnaryD.op) {
   3607             case ARMvfpu_COPY:
   3608                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3609                break;
   3610             case ARMvfpu_ABS:
   3611                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
   3612                break;
   3613             case ARMvfpu_NEG:
   3614                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
   3615                break;
   3616             case ARMvfpu_SQRT:
   3617                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
   3618                break;
   3619             default:
   3620                goto bad;
   3621          }
   3622          *p++ = insn;
   3623          goto done;
   3624       }
   3625       case ARMin_VUnaryS: {
   3626          UInt fD   = fregNo(i->ARMin.VUnaryS.dst);
   3627          UInt fM   = fregNo(i->ARMin.VUnaryS.src);
   3628          UInt insn = 0;
   3629          switch (i->ARMin.VUnaryS.op) {
   3630             case ARMvfpu_COPY:
   3631                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3632                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3633                                (fM >> 1));
   3634                break;
   3635             case ARMvfpu_ABS:
   3636                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3637                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3638                                (fM >> 1));
   3639                break;
   3640             case ARMvfpu_NEG:
   3641                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3642                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3643                                (fM >> 1));
   3644                break;
   3645             case ARMvfpu_SQRT:
   3646                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3647                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3648                                (fM >> 1));
   3649                break;
   3650             default:
   3651                goto bad;
   3652          }
   3653          *p++ = insn;
   3654          goto done;
   3655       }
   3656       case ARMin_VCmpD: {
   3657          UInt dD   = dregNo(i->ARMin.VCmpD.argL);
   3658          UInt dM   = dregNo(i->ARMin.VCmpD.argR);
   3659          UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
   3660          *p++ = insn;       /* FCMPD dD, dM */
   3661          *p++ = 0xEEF1FA10; /* FMSTAT */
   3662          goto done;
   3663       }
   3664       case ARMin_VCMovD: {
   3665          UInt cc = (UInt)i->ARMin.VCMovD.cond;
   3666          UInt dD = dregNo(i->ARMin.VCMovD.dst);
   3667          UInt dM = dregNo(i->ARMin.VCMovD.src);
   3668          vassert(cc < 16 && cc != ARMcc_AL);
   3669          UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3670          *p++ = insn;
   3671          goto done;
   3672       }
   3673       case ARMin_VCMovS: {
   3674          UInt cc = (UInt)i->ARMin.VCMovS.cond;
   3675          UInt fD = fregNo(i->ARMin.VCMovS.dst);
   3676          UInt fM = fregNo(i->ARMin.VCMovS.src);
   3677          vassert(cc < 16 && cc != ARMcc_AL);
   3678          UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
   3679                               X0000,(fD >> 1),X1010,
   3680                               BITS4(0,1,(fM & 1),0), (fM >> 1));
   3681          *p++ = insn;
   3682          goto done;
   3683       }
   3684       case ARMin_VCvtSD: {
   3685          if (i->ARMin.VCvtSD.sToD) {
   3686             UInt dD = dregNo(i->ARMin.VCvtSD.dst);
   3687             UInt fM = fregNo(i->ARMin.VCvtSD.src);
   3688             UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
   3689                                  BITS4(1,1, (fM & 1), 0),
   3690                                  (fM >> 1));
   3691             *p++ = insn;
   3692             goto done;
   3693          } else {
   3694             UInt fD = fregNo(i->ARMin.VCvtSD.dst);
   3695             UInt dM = dregNo(i->ARMin.VCvtSD.src);
   3696             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
   3697                                  X0111, (fD >> 1),
   3698                                  X1011, X1100, dM);
   3699             *p++ = insn;
   3700             goto done;
   3701          }
   3702       }
   3703       case ARMin_VXferD: {
   3704          UInt dD  = dregNo(i->ARMin.VXferD.dD);
   3705          UInt rHi = iregNo(i->ARMin.VXferD.rHi);
   3706          UInt rLo = iregNo(i->ARMin.VXferD.rLo);
   3707          /* vmov dD, rLo, rHi is
   3708             E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3709             vmov rLo, rHi, dD is
   3710             E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3711          */
   3712          UInt insn
   3713             = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
   3714                        rHi, rLo, 0xB,
   3715                        BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
   3716          *p++ = insn;
   3717          goto done;
   3718       }
   3719       case ARMin_VXferS: {
   3720          UInt fD  = fregNo(i->ARMin.VXferS.fD);
   3721          UInt rLo = iregNo(i->ARMin.VXferS.rLo);
   3722          /* vmov fD, rLo is
   3723             E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
   3724             vmov rLo, fD is
   3725             E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
   3726          */
   3727          UInt insn
   3728             = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
   3729                        (fD >> 1) & 0xF, rLo, 0xA,
   3730                        BITS4((fD & 1),0,0,1), 0);
   3731          *p++ = insn;
   3732          goto done;
   3733       }
   3734       case ARMin_VCvtID: {
   3735          Bool iToD = i->ARMin.VCvtID.iToD;
   3736          Bool syned = i->ARMin.VCvtID.syned;
   3737          if (iToD && syned) {
   3738             // FSITOD: I32S-in-freg to F64-in-dreg
   3739             UInt regF = fregNo(i->ARMin.VCvtID.src);
   3740             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   3741             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3742                                  X1011, BITS4(1,1,(regF & 1),0),
   3743                                  (regF >> 1) & 0xF);
   3744             *p++ = insn;
   3745             goto done;
   3746          }
   3747          if (iToD && (!syned)) {
   3748             // FUITOD: I32U-in-freg to F64-in-dreg
   3749             UInt regF = fregNo(i->ARMin.VCvtID.src);
   3750             UInt regD = dregNo(i->ARMin.VCvtID.dst);
   3751             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3752                                  X1011, BITS4(0,1,(regF & 1),0),
   3753                                  (regF >> 1) & 0xF);
   3754             *p++ = insn;
   3755             goto done;
   3756          }
   3757          if ((!iToD) && syned) {
   3758             // FTOSID: F64-in-dreg to I32S-in-freg
   3759             UInt regD = dregNo(i->ARMin.VCvtID.src);
   3760             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   3761             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3762                                  X1101, (regF >> 1) & 0xF,
   3763                                  X1011, X0100, regD);
   3764             *p++ = insn;
   3765             goto done;
   3766          }
   3767          if ((!iToD) && (!syned)) {
   3768             // FTOUID: F64-in-dreg to I32U-in-freg
   3769             UInt regD = dregNo(i->ARMin.VCvtID.src);
   3770             UInt regF = fregNo(i->ARMin.VCvtID.dst);
   3771             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3772                                  X1100, (regF >> 1) & 0xF,
   3773                                  X1011, X0100, regD);
   3774             *p++ = insn;
   3775             goto done;
   3776          }
   3777          /*UNREACHED*/
   3778          vassert(0);
   3779       }
   3780       case ARMin_FPSCR: {
   3781          Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
   3782          UInt iReg    = iregNo(i->ARMin.FPSCR.iReg);
   3783          if (toFPSCR) {
   3784             /* fmxr fpscr, iReg is EEE1 iReg A10 */
   3785             *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
   3786             goto done;
   3787          }
   3788          goto bad; // FPSCR -> iReg case currently ATC
   3789       }
   3790       case ARMin_MFence: {
   3791          // It's not clear (to me) how these relate to the ARMv7
   3792          // versions, so let's just use the v7 versions as they
   3793          // are at least well documented.
   3794          //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
   3795          //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
   3796          //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
   3797          *p++ = 0xF57FF04F; /* DSB sy */
   3798          *p++ = 0xF57FF05F; /* DMB sy */
   3799          *p++ = 0xF57FF06F; /* ISB */
   3800          goto done;
   3801       }
   3802       case ARMin_CLREX: {
   3803          *p++ = 0xF57FF01F; /* clrex */
   3804          goto done;
   3805       }
   3806 
   3807       case ARMin_NLdStQ: {
   3808          UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
   3809          UInt regN, regM;
   3810          UInt D = regD >> 4;
   3811          UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
   3812          UInt insn;
   3813          vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
   3814          regD &= 0xF;
   3815          if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
   3816             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
   3817             regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
   3818          } else {
   3819             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
   3820             regM = 15;
   3821          }
   3822          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   3823                               regN, regD, X1010, X1000, regM);
   3824          *p++ = insn;
   3825          goto done;
   3826       }
   3827       case ARMin_NLdStD: {
   3828          UInt regD = dregNo(i->ARMin.NLdStD.dD);
   3829          UInt regN, regM;
   3830          UInt D = regD >> 4;
   3831          UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
   3832          UInt insn;
   3833          vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
   3834          regD &= 0xF;
   3835          if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
   3836             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
   3837             regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
   3838          } else {
   3839             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
   3840             regM = 15;
   3841          }
   3842          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   3843                               regN, regD, X0111, X1000, regM);
   3844          *p++ = insn;
   3845          goto done;
   3846       }
   3847       case ARMin_NUnaryS: {
   3848          UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
   3849          UInt regD, D;
   3850          UInt regM, M;
   3851          UInt size = i->ARMin.NUnaryS.size;
   3852          UInt insn;
   3853          UInt opc, opc1, opc2;
   3854          switch (i->ARMin.NUnaryS.op) {
   3855 	    case ARMneon_VDUP:
   3856                if (i->ARMin.NUnaryS.size >= 16)
   3857                   goto bad;
   3858                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
   3859                   goto bad;
   3860                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3861                   goto bad;
   3862                regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
   3863                         ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
   3864                         : dregNo(i->ARMin.NUnaryS.dst->reg);
   3865                regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
   3866                         ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
   3867                         : dregNo(i->ARMin.NUnaryS.src->reg);
   3868                D = regD >> 4;
   3869                M = regM >> 4;
   3870                regD &= 0xf;
   3871                regM &= 0xf;
   3872                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
   3873                                (i->ARMin.NUnaryS.size & 0xf), regD,
   3874                                X1100, BITS4(0,Q,M,0), regM);
   3875                *p++ = insn;
   3876                goto done;
   3877             case ARMneon_SETELEM:
   3878                regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
   3879                                 dregNo(i->ARMin.NUnaryS.dst->reg);
   3880                regM = iregNo(i->ARMin.NUnaryS.src->reg);
   3881                M = regM >> 4;
   3882                D = regD >> 4;
   3883                regM &= 0xF;
   3884                regD &= 0xF;
   3885                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
   3886                   goto bad;
   3887                switch (size) {
   3888                   case 0:
   3889                      if (i->ARMin.NUnaryS.dst->index > 7)
   3890                         goto bad;
   3891                      opc = X1000 | i->ARMin.NUnaryS.dst->index;
   3892                      break;
   3893                   case 1:
   3894                      if (i->ARMin.NUnaryS.dst->index > 3)
   3895                         goto bad;
   3896                      opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
   3897                      break;
   3898                   case 2:
   3899                      if (i->ARMin.NUnaryS.dst->index > 1)
   3900                         goto bad;
   3901                      opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
   3902                      break;
   3903                   default:
   3904                      goto bad;
   3905                }
   3906                opc1 = (opc >> 2) & 3;
   3907                opc2 = opc & 3;
   3908                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
   3909                                regD, regM, X1011,
   3910                                BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
   3911                *p++ = insn;
   3912                goto done;
   3913             case ARMneon_GETELEMU:
   3914                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   3915                                 dregNo(i->ARMin.NUnaryS.src->reg);
   3916                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   3917                M = regM >> 4;
   3918                D = regD >> 4;
   3919                regM &= 0xF;
   3920                regD &= 0xF;
   3921                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3922                   goto bad;
   3923                switch (size) {
   3924                   case 0:
   3925                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   3926                         regM++;
   3927                         i->ARMin.NUnaryS.src->index -= 8;
   3928                      }
   3929                      if (i->ARMin.NUnaryS.src->index > 7)
   3930                         goto bad;
   3931                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   3932                      break;
   3933                   case 1:
   3934                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   3935                         regM++;
   3936                         i->ARMin.NUnaryS.src->index -= 4;
   3937                      }
   3938                      if (i->ARMin.NUnaryS.src->index > 3)
   3939                         goto bad;
   3940                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   3941                      break;
   3942                   case 2:
   3943                      goto bad;
   3944                   default:
   3945                      goto bad;
   3946                }
   3947                opc1 = (opc >> 2) & 3;
   3948                opc2 = opc & 3;
   3949                insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
   3950                                regM, regD, X1011,
   3951                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   3952                *p++ = insn;
   3953                goto done;
   3954             case ARMneon_GETELEMS:
   3955                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
   3956                                 dregNo(i->ARMin.NUnaryS.src->reg);
   3957                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
   3958                M = regM >> 4;
   3959                D = regD >> 4;
   3960                regM &= 0xF;
   3961                regD &= 0xF;
   3962                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   3963                   goto bad;
   3964                switch (size) {
   3965                   case 0:
   3966                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   3967                         regM++;
   3968                         i->ARMin.NUnaryS.src->index -= 8;
   3969                      }
   3970                      if (i->ARMin.NUnaryS.src->index > 7)
   3971                         goto bad;
   3972                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   3973                      break;
   3974                   case 1:
   3975                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   3976                         regM++;
   3977                         i->ARMin.NUnaryS.src->index -= 4;
   3978                      }
   3979                      if (i->ARMin.NUnaryS.src->index > 3)
   3980                         goto bad;
   3981                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   3982                      break;
   3983                   case 2:
   3984                      if (Q && i->ARMin.NUnaryS.src->index > 1) {
   3985                         regM++;
   3986                         i->ARMin.NUnaryS.src->index -= 2;
   3987                      }
   3988                      if (i->ARMin.NUnaryS.src->index > 1)
   3989                         goto bad;
   3990                      opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
   3991                      break;
   3992                   default:
   3993                      goto bad;
   3994                }
   3995                opc1 = (opc >> 2) & 3;
   3996                opc2 = opc & 3;
   3997                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
   3998                                regM, regD, X1011,
   3999                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   4000                *p++ = insn;
   4001                goto done;
   4002             default:
   4003                goto bad;
   4004          }
   4005       }
   4006       case ARMin_NUnary: {
   4007          UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
   4008          UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
   4009                        ? (qregNo(i->ARMin.NUnary.dst) << 1)
   4010                        : dregNo(i->ARMin.NUnary.dst);
   4011          UInt regM, M;
   4012          UInt D = regD >> 4;
   4013          UInt sz1 = i->ARMin.NUnary.size >> 1;
   4014          UInt sz2 = i->ARMin.NUnary.size & 1;
   4015          UInt sz = i->ARMin.NUnary.size;
   4016          UInt insn;
   4017          UInt F = 0; /* TODO: floating point EQZ ??? */
   4018          if (i->ARMin.NUnary.op != ARMneon_DUP) {
   4019             regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
   4020                      ? (qregNo(i->ARMin.NUnary.src) << 1)
   4021                      : dregNo(i->ARMin.NUnary.src);
   4022             M = regM >> 4;
   4023          } else {
   4024             regM = iregNo(i->ARMin.NUnary.src);
   4025             M = regM >> 4;
   4026          }
   4027          regD &= 0xF;
   4028          regM &= 0xF;
   4029          switch (i->ARMin.NUnary.op) {
   4030             case ARMneon_COPY: /* VMOV reg, reg */
   4031                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
   4032                                BITS4(M,Q,M,1), regM);
   4033                break;
   4034             case ARMneon_COPYN: /* VMOVN regD, regQ */
   4035                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4036                                regD, X0010, BITS4(0,0,M,0), regM);
   4037                break;
   4038             case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
   4039                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4040                                regD, X0010, BITS4(1,0,M,0), regM);
   4041                break;
   4042             case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
   4043                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4044                                regD, X0010, BITS4(0,1,M,0), regM);
   4045                break;
   4046             case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
   4047                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4048                                regD, X0010, BITS4(1,1,M,0), regM);
   4049                break;
   4050             case ARMneon_COPYLS: /* VMOVL regQ, regD */
   4051                if (sz >= 3)
   4052                   goto bad;
   4053                insn = XXXXXXXX(0xF, X0010,
   4054                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   4055                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   4056                                regD, X1010, BITS4(0,0,M,1), regM);
   4057                break;
   4058             case ARMneon_COPYLU: /* VMOVL regQ, regD */
   4059                if (sz >= 3)
   4060                   goto bad;
   4061                insn = XXXXXXXX(0xF, X0011,
   4062                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   4063                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   4064                                regD, X1010, BITS4(0,0,M,1), regM);
   4065                break;
   4066             case ARMneon_NOT: /* VMVN reg, reg*/
   4067                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   4068                                BITS4(1,Q,M,0), regM);
   4069                break;
   4070             case ARMneon_EQZ:
   4071                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   4072                                regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
   4073                break;
   4074             case ARMneon_CNT:
   4075                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   4076                                BITS4(0,Q,M,0), regM);
   4077                break;
   4078             case ARMneon_CLZ:
   4079                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4080                                regD, X0100, BITS4(1,Q,M,0), regM);
   4081                break;
   4082             case ARMneon_CLS:
   4083                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4084                                regD, X0100, BITS4(0,Q,M,0), regM);
   4085                break;
   4086             case ARMneon_ABS:
   4087                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   4088                                regD, X0011, BITS4(0,Q,M,0), regM);
   4089                break;
   4090             case ARMneon_DUP:
   4091                sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
   4092                sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
   4093                vassert(sz1 + sz2 < 2);
   4094                insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
   4095                                X1011, BITS4(D,0,sz2,1), X0000);
   4096                break;
   4097             case ARMneon_REV16:
   4098                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4099                                regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
   4100                break;
   4101             case ARMneon_REV32:
   4102                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4103                                regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
   4104                break;
   4105             case ARMneon_REV64:
   4106                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4107                                regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
   4108                break;
   4109             case ARMneon_PADDLU:
   4110                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4111                                regD, X0010, BITS4(1,Q,M,0), regM);
   4112                break;
   4113             case ARMneon_PADDLS:
   4114                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4115                                regD, X0010, BITS4(0,Q,M,0), regM);
   4116                break;
   4117             case ARMneon_VQSHLNUU:
   4118                insn = XXXXXXXX(0xF, X0011,
   4119                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   4120                                sz & 0xf, regD, X0111,
   4121                                BITS4(sz >> 6,Q,M,1), regM);
   4122                break;
   4123             case ARMneon_VQSHLNSS:
   4124                insn = XXXXXXXX(0xF, X0010,
   4125                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   4126                                sz & 0xf, regD, X0111,
   4127                                BITS4(sz >> 6,Q,M,1), regM);
   4128                break;
   4129             case ARMneon_VQSHLNUS:
   4130                insn = XXXXXXXX(0xF, X0011,
   4131                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   4132                                sz & 0xf, regD, X0110,
   4133                                BITS4(sz >> 6,Q,M,1), regM);
   4134                break;
   4135             case ARMneon_VCVTFtoS:
   4136                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   4137                                BITS4(0,Q,M,0), regM);
   4138                break;
   4139             case ARMneon_VCVTFtoU:
   4140                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   4141                                BITS4(1,Q,M,0), regM);
   4142                break;
   4143             case ARMneon_VCVTStoF:
   4144                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   4145                                BITS4(0,Q,M,0), regM);
   4146                break;
   4147             case ARMneon_VCVTUtoF:
   4148                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   4149                                BITS4(1,Q,M,0), regM);
   4150                break;
   4151             case ARMneon_VCVTFtoFixedU:
   4152                sz1 = (sz >> 5) & 1;
   4153                sz2 = (sz >> 4) & 1;
   4154                sz &= 0xf;
   4155                insn = XXXXXXXX(0xF, X0011,
   4156                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   4157                                BITS4(0,Q,M,1), regM);
   4158                break;
   4159             case ARMneon_VCVTFtoFixedS:
   4160                sz1 = (sz >> 5) & 1;
   4161                sz2 = (sz >> 4) & 1;
   4162                sz &= 0xf;
   4163                insn = XXXXXXXX(0xF, X0010,
   4164                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   4165                                BITS4(0,Q,M,1), regM);
   4166                break;
   4167             case ARMneon_VCVTFixedUtoF:
   4168                sz1 = (sz >> 5) & 1;
   4169                sz2 = (sz >> 4) & 1;
   4170                sz &= 0xf;
   4171                insn = XXXXXXXX(0xF, X0011,
   4172                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   4173                                BITS4(0,Q,M,1), regM);
   4174                break;
   4175             case ARMneon_VCVTFixedStoF:
   4176                sz1 = (sz >> 5) & 1;
   4177                sz2 = (sz >> 4) & 1;
   4178                sz &= 0xf;
   4179                insn = XXXXXXXX(0xF, X0010,
   4180                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   4181                                BITS4(0,Q,M,1), regM);
   4182                break;
   4183             case ARMneon_VCVTF32toF16:
   4184                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
   4185                                BITS4(0,0,M,0), regM);
   4186                break;
   4187             case ARMneon_VCVTF16toF32:
   4188                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
   4189                                BITS4(0,0,M,0), regM);
   4190                break;
   4191             case ARMneon_VRECIP:
   4192                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   4193                                BITS4(0,Q,M,0), regM);
   4194                break;
   4195             case ARMneon_VRECIPF:
   4196                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   4197                                BITS4(0,Q,M,0), regM);
   4198                break;
   4199             case ARMneon_VABSFP:
   4200                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   4201                                BITS4(0,Q,M,0), regM);
   4202                break;
   4203             case ARMneon_VRSQRTEFP:
   4204                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   4205                                BITS4(1,Q,M,0), regM);
   4206                break;
   4207             case ARMneon_VRSQRTE:
   4208                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   4209                                BITS4(1,Q,M,0), regM);
   4210                break;
   4211             case ARMneon_VNEGF:
   4212                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   4213                                BITS4(1,Q,M,0), regM);
   4214                break;
   4215 
   4216             default:
   4217                goto bad;
   4218          }
   4219          *p++ = insn;
   4220          goto done;
   4221       }
   4222       case ARMin_NDual: {
   4223          UInt Q = i->ARMin.NDual.Q ? 1 : 0;
   4224          UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
   4225                        ? (qregNo(i->ARMin.NDual.arg1) << 1)
   4226                        : dregNo(i->ARMin.NDual.arg1);
   4227          UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
   4228                        ? (qregNo(i->ARMin.NDual.arg2) << 1)
   4229                        : dregNo(i->ARMin.NDual.arg2);
   4230          UInt D = regD >> 4;
   4231          UInt M = regM >> 4;
   4232          UInt sz1 = i->ARMin.NDual.size >> 1;
   4233          UInt sz2 = i->ARMin.NDual.size & 1;
   4234          UInt insn;
   4235          regD &= 0xF;
   4236          regM &= 0xF;
   4237          switch (i->ARMin.NDual.op) {
   4238             case ARMneon_TRN: /* VTRN reg, reg */
   4239                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4240                                regD, X0000, BITS4(1,Q,M,0), regM);
   4241                break;
   4242             case ARMneon_ZIP: /* VZIP reg, reg */
   4243                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4244                                regD, X0001, BITS4(1,Q,M,0), regM);
   4245                break;
   4246             case ARMneon_UZP: /* VUZP reg, reg */
   4247                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4248                                regD, X0001, BITS4(0,Q,M,0), regM);
   4249                break;
   4250             default:
   4251                goto bad;
   4252          }
   4253          *p++ = insn;
   4254          goto done;
   4255       }
   4256       case ARMin_NBinary: {
   4257          UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
   4258          UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
   4259                        ? (qregNo(i->ARMin.NBinary.dst) << 1)
   4260                        : dregNo(i->ARMin.NBinary.dst);
   4261          UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
   4262                        ? (qregNo(i->ARMin.NBinary.argL) << 1)
   4263                        : dregNo(i->ARMin.NBinary.argL);
   4264          UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
   4265                        ? (qregNo(i->ARMin.NBinary.argR) << 1)
   4266                        : dregNo(i->ARMin.NBinary.argR);
   4267          UInt sz1 = i->ARMin.NBinary.size >> 1;
   4268          UInt sz2 = i->ARMin.NBinary.size & 1;
   4269          UInt D = regD >> 4;
   4270          UInt N = regN >> 4;
   4271          UInt M = regM >> 4;
   4272          UInt insn;
   4273          regD &= 0xF;
   4274          regM &= 0xF;
   4275          regN &= 0xF;
   4276          switch (i->ARMin.NBinary.op) {
   4277             case ARMneon_VAND: /* VAND reg, reg, reg */
   4278                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
   4279                                BITS4(N,Q,M,1), regM);
   4280                break;
   4281             case ARMneon_VORR: /* VORR reg, reg, reg*/
   4282                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
   4283                                BITS4(N,Q,M,1), regM);
   4284                break;
   4285             case ARMneon_VXOR: /* VEOR reg, reg, reg */
   4286                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
   4287                                BITS4(N,Q,M,1), regM);
   4288                break;
   4289             case ARMneon_VADD: /* VADD reg, reg, reg */
   4290                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4291                                X1000, BITS4(N,Q,M,0), regM);
   4292                break;
   4293             case ARMneon_VSUB: /* VSUB reg, reg, reg */
   4294                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4295                                X1000, BITS4(N,Q,M,0), regM);
   4296                break;
   4297             case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
   4298                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4299                                X0110, BITS4(N,Q,M,1), regM);
   4300                break;
   4301             case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
   4302                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4303                                X0110, BITS4(N,Q,M,1), regM);
   4304                break;
   4305             case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
   4306                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4307                                X0110, BITS4(N,Q,M,0), regM);
   4308                break;
   4309             case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
   4310                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4311                                X0110, BITS4(N,Q,M,0), regM);
   4312                break;
   4313             case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
   4314                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4315                                X0001, BITS4(N,Q,M,0), regM);
   4316                break;
   4317             case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
   4318                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4319                                X0001, BITS4(N,Q,M,0), regM);
   4320                break;
   4321             case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
   4322                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4323                                X0000, BITS4(N,Q,M,1), regM);
   4324                break;
   4325             case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
   4326                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4327                                X0000, BITS4(N,Q,M,1), regM);
   4328                break;
   4329             case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
   4330                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4331                                X0010, BITS4(N,Q,M,1), regM);
   4332                break;
   4333             case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
   4334                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4335                                X0010, BITS4(N,Q,M,1), regM);
   4336                break;
   4337             case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
   4338                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4339                                X0011, BITS4(N,Q,M,0), regM);
   4340                break;
   4341             case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
   4342                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4343                                X0011, BITS4(N,Q,M,0), regM);
   4344                break;
   4345             case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
   4346                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4347                                X0011, BITS4(N,Q,M,1), regM);
   4348                break;
   4349             case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
   4350                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4351                                X0011, BITS4(N,Q,M,1), regM);
   4352                break;
   4353             case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
   4354                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4355                                X1000, BITS4(N,Q,M,1), regM);
   4356                break;
   4357             case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
   4358                if (i->ARMin.NBinary.size >= 16)
   4359                   goto bad;
   4360                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
   4361                                i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
   4362                                regM);
   4363                break;
   4364             case ARMneon_VMUL:
   4365                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4366                                X1001, BITS4(N,Q,M,1), regM);
   4367                break;
   4368             case ARMneon_VMULLU:
   4369                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
   4370                                X1100, BITS4(N,0,M,0), regM);
   4371                break;
   4372             case ARMneon_VMULLS:
   4373                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4374                                X1100, BITS4(N,0,M,0), regM);
   4375                break;
   4376             case ARMneon_VMULP:
   4377                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4378                                X1001, BITS4(N,Q,M,1), regM);
   4379                break;
   4380             case ARMneon_VMULFP:
   4381                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4382                                X1101, BITS4(N,Q,M,1), regM);
   4383                break;
   4384             case ARMneon_VMULLP:
   4385                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4386                                X1110, BITS4(N,0,M,0), regM);
   4387                break;
   4388             case ARMneon_VQDMULH:
   4389                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4390                                X1011, BITS4(N,Q,M,0), regM);
   4391                break;
   4392             case ARMneon_VQRDMULH:
   4393                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4394                                X1011, BITS4(N,Q,M,0), regM);
   4395                break;
   4396             case ARMneon_VQDMULL:
   4397                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4398                                X1101, BITS4(N,0,M,0), regM);
   4399                break;
   4400             case ARMneon_VTBL:
   4401                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
   4402                                X1000, BITS4(N,0,M,0), regM);
   4403                break;
   4404             case ARMneon_VPADD:
   4405                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4406                                X1011, BITS4(N,Q,M,1), regM);
   4407                break;
   4408             case ARMneon_VPADDFP:
   4409                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4410                                X1101, BITS4(N,Q,M,0), regM);
   4411                break;
   4412             case ARMneon_VPMINU:
   4413                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4414                                X1010, BITS4(N,Q,M,1), regM);
   4415                break;
   4416             case ARMneon_VPMINS:
   4417                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4418                                X1010, BITS4(N,Q,M,1), regM);
   4419                break;
   4420             case ARMneon_VPMAXU:
   4421                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4422                                X1010, BITS4(N,Q,M,0), regM);
   4423                break;
   4424             case ARMneon_VPMAXS:
   4425                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4426                                X1010, BITS4(N,Q,M,0), regM);
   4427                break;
   4428             case ARMneon_VADDFP: /* VADD reg, reg, reg */
   4429                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   4430                                X1101, BITS4(N,Q,M,0), regM);
   4431                break;
   4432             case ARMneon_VSUBFP: /* VADD reg, reg, reg */
   4433                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   4434                                X1101, BITS4(N,Q,M,0), regM);
   4435                break;
   4436             case ARMneon_VABDFP: /* VABD reg, reg, reg */
   4437                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   4438                                X1101, BITS4(N,Q,M,0), regM);
   4439                break;
   4440             case ARMneon_VMINF:
   4441                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   4442                                X1111, BITS4(N,Q,M,0), regM);
   4443                break;
   4444             case ARMneon_VMAXF:
   4445                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   4446                                X1111, BITS4(N,Q,M,0), regM);
   4447                break;
   4448             case ARMneon_VPMINF:
   4449                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   4450                                X1111, BITS4(N,Q,M,0), regM);
   4451                break;
   4452             case ARMneon_VPMAXF:
   4453                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4454                                X1111, BITS4(N,Q,M,0), regM);
   4455                break;
   4456             case ARMneon_VRECPS:
   4457                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
   4458                                BITS4(N,Q,M,1), regM);
   4459                break;
   4460             case ARMneon_VCGTF:
   4461                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
   4462                                BITS4(N,Q,M,0), regM);
   4463                break;
   4464             case ARMneon_VCGEF:
   4465                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
   4466                                BITS4(N,Q,M,0), regM);
   4467                break;
   4468             case ARMneon_VCEQF:
   4469                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
   4470                                BITS4(N,Q,M,0), regM);
   4471                break;
   4472             case ARMneon_VRSQRTS:
   4473                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
   4474                                BITS4(N,Q,M,1), regM);
   4475                break;
   4476             default:
   4477                goto bad;
   4478          }
   4479          *p++ = insn;
   4480          goto done;
   4481       }
   4482       case ARMin_NShift: {
   4483          UInt Q = i->ARMin.NShift.Q ? 1 : 0;
   4484          UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
   4485                        ? (qregNo(i->ARMin.NShift.dst) << 1)
   4486                        : dregNo(i->ARMin.NShift.dst);
   4487          UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
   4488                        ? (qregNo(i->ARMin.NShift.argL) << 1)
   4489                        : dregNo(i->ARMin.NShift.argL);
   4490          UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
   4491                        ? (qregNo(i->ARMin.NShift.argR) << 1)
   4492                        : dregNo(i->ARMin.NShift.argR);
   4493          UInt sz1 = i->ARMin.NShift.size >> 1;
   4494          UInt sz2 = i->ARMin.NShift.size & 1;
   4495          UInt D = regD >> 4;
   4496          UInt N = regN >> 4;
   4497          UInt M = regM >> 4;
   4498          UInt insn;
   4499          regD &= 0xF;
   4500          regM &= 0xF;
   4501          regN &= 0xF;
   4502          switch (i->ARMin.NShift.op) {
   4503             case ARMneon_VSHL:
   4504                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4505                                X0100, BITS4(N,Q,M,0), regM);
   4506                break;
   4507             case ARMneon_VSAL:
   4508                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4509                                X0100, BITS4(N,Q,M,0), regM);
   4510                break;
   4511             case ARMneon_VQSHL:
   4512                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4513                                X0100, BITS4(N,Q,M,1), regM);
   4514                break;
   4515             case ARMneon_VQSAL:
   4516                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4517                                X0100, BITS4(N,Q,M,1), regM);
   4518                break;
   4519             default:
   4520                goto bad;
   4521          }
   4522          *p++ = insn;
   4523          goto done;
   4524       }
   4525       case ARMin_NShl64: {
   4526          HReg regDreg = i->ARMin.NShl64.dst;
   4527          HReg regMreg = i->ARMin.NShl64.src;
   4528          UInt amt     = i->ARMin.NShl64.amt;
   4529          vassert(amt >= 1 && amt <= 63);
   4530          vassert(hregClass(regDreg) == HRcFlt64);
   4531          vassert(hregClass(regMreg) == HRcFlt64);
   4532          UInt regD = dregNo(regDreg);
   4533          UInt regM = dregNo(regMreg);
   4534          UInt D    = (regD >> 4) & 1;
   4535          UInt Vd   = regD & 0xF;
   4536          UInt L    = 1;
   4537          UInt Q    = 0; /* always 64-bit */
   4538          UInt M    = (regM >> 4) & 1;
   4539          UInt Vm   = regM & 0xF;
   4540          UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
   4541                               amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
   4542          *p++ = insn;
   4543          goto done;
   4544       }
   4545       case ARMin_NeonImm: {
   4546          UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
   4547          UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
   4548                           dregNo(i->ARMin.NeonImm.dst);
   4549          UInt D = regD >> 4;
   4550          UInt imm = i->ARMin.NeonImm.imm->imm8;
   4551          UInt tp = i->ARMin.NeonImm.imm->type;
   4552          UInt j = imm >> 7;
   4553          UInt imm3 = (imm >> 4) & 0x7;
   4554          UInt imm4 = imm & 0xF;
   4555          UInt cmode, op;
   4556          UInt insn;
   4557          regD &= 0xF;
   4558          if (tp == 9)
   4559             op = 1;
   4560          else
   4561             op = 0;
   4562          switch (tp) {
   4563             case 0:
   4564             case 1:
   4565             case 2:
   4566             case 3:
   4567             case 4:
   4568             case 5:
   4569                cmode = tp << 1;
   4570                break;
   4571             case 9:
   4572             case 6:
   4573                cmode = 14;
   4574                break;
   4575             case 7:
   4576                cmode = 12;
   4577                break;
   4578             case 8:
   4579                cmode = 13;
   4580                break;
   4581             case 10:
   4582                cmode = 15;
   4583                break;
   4584             default:
   4585                vpanic("ARMin_NeonImm");
   4586 
   4587          }
   4588          insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
   4589                          cmode, BITS4(0,Q,op,1), imm4);
   4590          *p++ = insn;
   4591          goto done;
   4592       }
   4593       case ARMin_NCMovQ: {
   4594          UInt cc = (UInt)i->ARMin.NCMovQ.cond;
   4595          UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
   4596          UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
   4597          UInt vM = qM & 0xF;
   4598          UInt vD = qD & 0xF;
   4599          UInt M  = (qM >> 4) & 1;
   4600          UInt D  = (qD >> 4) & 1;
   4601          vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
   4602          /* b!cc here+8: !cc A00 0000 */
   4603          UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
   4604          *p++ = insn;
   4605          /* vmov qD, qM */
   4606          insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
   4607                          vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
   4608          *p++ = insn;
   4609          goto done;
   4610       }
   4611       case ARMin_Add32: {
   4612          UInt regD = iregNo(i->ARMin.Add32.rD);
   4613          UInt regN = iregNo(i->ARMin.Add32.rN);
   4614          UInt imm32 = i->ARMin.Add32.imm32;
   4615          vassert(regD != regN);
   4616          /* MOV regD, imm32 */
   4617          p = imm32_to_iregNo((UInt *)p, regD, imm32);
   4618          /* ADD regD, regN, regD */
   4619          UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
   4620          *p++ = insn;
   4621          goto done;
   4622       }
   4623 
   4624       case ARMin_EvCheck: {
   4625          /* We generate:
   4626                ldr  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
   4627                subs r12, r12, #1  (A1)
   4628                str  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
   4629                bpl  nofail
   4630                ldr  r12, [r8 + #0]   0 == offsetof(host_EvC_FAILADDR)
   4631                bx   r12
   4632               nofail:
   4633          */
   4634          UInt* p0 = p;
   4635          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
   4636                                 i->ARMin.EvCheck.amCounter);
   4637          *p++ = 0xE25CC001; /* subs r12, r12, #1 */
   4638          p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
   4639                                 i->ARMin.EvCheck.amCounter);
   4640          *p++ = 0x5A000001; /* bpl nofail */
   4641          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
   4642                                 i->ARMin.EvCheck.amFailAddr);
   4643          *p++ = 0xE12FFF1C; /* bx r12 */
   4644          /* nofail: */
   4645 
   4646          /* Crosscheck */
   4647          vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
   4648          goto done;
   4649       }
   4650 
   4651       case ARMin_ProfInc: {
   4652          /* We generate:
   4653               (ctrP is unknown now, so use 0x65556555 in the
   4654               expectation that a later call to LibVEX_patchProfCtr
   4655               will be used to fill in the immediate fields once the
   4656               right value is known.)
   4657             movw r12, lo16(0x65556555)
   4658             movt r12, lo16(0x65556555)
   4659             ldr  r11, [r12]
   4660             adds r11, r11, #1
   4661             str  r11, [r12]
   4662             ldr  r11, [r12+4]
   4663             adc  r11, r11, #0
   4664             str  r11, [r12+4]
   4665          */
   4666          p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
   4667          *p++ = 0xE59CB000;
   4668          *p++ = 0xE29BB001;
   4669          *p++ = 0xE58CB000;
   4670          *p++ = 0xE59CB004;
   4671          *p++ = 0xE2ABB000;
   4672          *p++ = 0xE58CB004;
   4673          /* Tell the caller .. */
   4674          vassert(!(*is_profInc));
   4675          *is_profInc = True;
   4676          goto done;
   4677       }
   4678 
   4679       /* ... */
   4680       default:
   4681          goto bad;
   4682     }
   4683 
   4684   bad:
   4685    ppARMInstr(i);
   4686    vpanic("emit_ARMInstr");
   4687    /*NOTREACHED*/
   4688 
   4689   done:
   4690    vassert(((UChar*)p) - &buf[0] <= 32);
   4691    return ((UChar*)p) - &buf[0];
   4692 }
   4693 
   4694 
   4695 /* How big is an event check?  See case for ARMin_EvCheck in
   4696    emit_ARMInstr just above.  That crosschecks what this returns, so
   4697    we can tell if we're inconsistent. */
   4698 Int evCheckSzB_ARM ( void )
   4699 {
   4700    return 24;
   4701 }
   4702 
   4703 
   4704 /* NB: what goes on here has to be very closely coordinated with the
   4705    emitInstr case for XDirect, above. */
   4706 VexInvalRange chainXDirect_ARM ( void* place_to_chain,
   4707                                  void* disp_cp_chain_me_EXPECTED,
   4708                                  void* place_to_jump_to )
   4709 {
   4710    /* What we're expecting to see is:
   4711         movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
   4712         movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
   4713         blx  r12
   4714       viz
   4715         <8 bytes generated by imm32_to_iregNo_EXACTLY2>
   4716         E1 2F FF 3C
   4717    */
   4718    UInt* p = (UInt*)place_to_chain;
   4719    vassert(0 == (3 & (HWord)p));
   4720    vassert(is_imm32_to_iregNo_EXACTLY2(
   4721               p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
   4722    vassert(p[2] == 0xE12FFF3C);
   4723    /* And what we want to change it to is either:
   4724         (general case)
   4725           movw r12, lo16(place_to_jump_to)
   4726           movt r12, hi16(place_to_jump_to)
   4727           bx   r12
   4728         viz
   4729           <8 bytes generated by imm32_to_iregNo_EXACTLY2>
   4730           E1 2F FF 1C
   4731       ---OR---
   4732         in the case where the displacement falls within 26 bits
   4733           b disp24; undef; undef
   4734         viz
   4735           EA <3 bytes == disp24>
   4736           FF 00 00 00
   4737           FF 00 00 00
   4738 
   4739       In both cases the replacement has the same length as the original.
   4740       To remain sane & verifiable,
   4741       (1) limit the displacement for the short form to
   4742           (say) +/- 30 million, so as to avoid wraparound
   4743           off-by-ones
   4744       (2) even if the short form is applicable, once every (say)
   4745           1024 times use the long form anyway, so as to maintain
   4746           verifiability
   4747    */
   4748 
   4749    /* This is the delta we need to put into a B insn.  It's relative
   4750       to the start of the next-but-one insn, hence the -8.  */
   4751    Long delta   = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)8;
   4752    Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
   4753    vassert(0 == (delta & (Long)3));
   4754 
   4755    static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
   4756    if (shortOK) {
   4757       shortCTR++; // thread safety bleh
   4758       if (0 == (shortCTR & 0x3FF)) {
   4759          shortOK = False;
   4760          if (0)
   4761             vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
   4762                        "using long form\n", shortCTR);
   4763       }
   4764    }
   4765 
   4766    /* And make the modifications. */
   4767    if (shortOK) {
   4768       Int simm24 = (Int)(delta >> 2);
   4769       vassert(simm24 == ((simm24 << 8) >> 8));
   4770       p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
   4771       p[1] = 0xFF000000;
   4772       p[2] = 0xFF000000;
   4773    } else {
   4774       (void)imm32_to_iregNo_EXACTLY2(
   4775                p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to));
   4776       p[2] = 0xE12FFF1C;
   4777    }
   4778 
   4779    VexInvalRange vir = {(HWord)p, 12};
   4780    return vir;
   4781 }
   4782 
   4783 
   4784 /* NB: what goes on here has to be very closely coordinated with the
   4785    emitInstr case for XDirect, above. */
   4786 VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
   4787                                    void* place_to_jump_to_EXPECTED,
   4788                                    void* disp_cp_chain_me )
   4789 {
   4790    /* What we're expecting to see is:
   4791         (general case)
   4792           movw r12, lo16(place_to_jump_to_EXPECTED)
   4793           movt r12, lo16(place_to_jump_to_EXPECTED)
   4794           bx   r12
   4795         viz
   4796           <8 bytes generated by imm32_to_iregNo_EXACTLY2>
   4797           E1 2F FF 1C
   4798       ---OR---
   4799         in the case where the displacement falls within 26 bits
   4800           b disp24; undef; undef
   4801         viz
   4802           EA <3 bytes == disp24>
   4803           FF 00 00 00
   4804           FF 00 00 00
   4805    */
   4806    UInt* p = (UInt*)place_to_unchain;
   4807    vassert(0 == (3 & (HWord)p));
   4808 
   4809    Bool valid = False;
   4810    if (is_imm32_to_iregNo_EXACTLY2(
   4811           p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED))
   4812        && p[2] == 0xE12FFF1C) {
   4813       valid = True; /* it's the long form */
   4814       if (0)
   4815          vex_printf("QQQ unchainXDirect_ARM: found long form\n");
   4816    } else
   4817    if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
   4818       /* It's the short form.  Check the displacement is right. */
   4819       Int simm24 = p[0] & 0x00FFFFFF;
   4820       simm24 <<= 8; simm24 >>= 8;
   4821       if ((UChar*)p + (simm24 << 2) + 8 == (UChar*)place_to_jump_to_EXPECTED) {
   4822          valid = True;
   4823          if (0)
   4824             vex_printf("QQQ unchainXDirect_ARM: found short form\n");
   4825       }
   4826    }
   4827    vassert(valid);
   4828 
   4829    /* And what we want to change it to is:
   4830         movw r12, lo16(disp_cp_chain_me)
   4831         movt r12, hi16(disp_cp_chain_me)
   4832         blx  r12
   4833       viz
   4834         <8 bytes generated by imm32_to_iregNo_EXACTLY2>
   4835         E1 2F FF 3C
   4836    */
   4837    (void)imm32_to_iregNo_EXACTLY2(
   4838             p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me));
   4839    p[2] = 0xE12FFF3C;
   4840    VexInvalRange vir = {(HWord)p, 12};
   4841    return vir;
   4842 }
   4843 
   4844 
   4845 /* Patch the counter address into a profile inc point, as previously
   4846    created by the ARMin_ProfInc case for emit_ARMInstr. */
   4847 VexInvalRange patchProfInc_ARM ( void*  place_to_patch,
   4848                                  ULong* location_of_counter )
   4849 {
   4850    vassert(sizeof(ULong*) == 4);
   4851    UInt* p = (UInt*)place_to_patch;
   4852    vassert(0 == (3 & (HWord)p));
   4853    vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
   4854    vassert(p[2] == 0xE59CB000);
   4855    vassert(p[3] == 0xE29BB001);
   4856    vassert(p[4] == 0xE58CB000);
   4857    vassert(p[5] == 0xE59CB004);
   4858    vassert(p[6] == 0xE2ABB000);
   4859    vassert(p[7] == 0xE58CB004);
   4860    imm32_to_iregNo_EXACTLY2(p, /*r*/12,
   4861                             (UInt)Ptr_to_ULong(location_of_counter));
   4862    VexInvalRange vir = {(HWord)p, 8};
   4863    return vir;
   4864 }
   4865 
   4866 
   4867 #undef BITS4
   4868 #undef X0000
   4869 #undef X0001
   4870 #undef X0010
   4871 #undef X0011
   4872 #undef X0100
   4873 #undef X0101
   4874 #undef X0110
   4875 #undef X0111
   4876 #undef X1000
   4877 #undef X1001
   4878 #undef X1010
   4879 #undef X1011
   4880 #undef X1100
   4881 #undef X1101
   4882 #undef X1110
   4883 #undef X1111
   4884 #undef XXXXX___
   4885 #undef XXXXXX__
   4886 #undef XXX___XX
   4887 #undef XXXXX__X
   4888 #undef XXXXXXXX
   4889 #undef XX______
   4890 
   4891 /*---------------------------------------------------------------*/
   4892 /*--- end                                     host_arm_defs.c ---*/
   4893 /*---------------------------------------------------------------*/
   4894