Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_arm_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2017 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2017 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_arm_defs.h"
     43 
     44 UInt arm_hwcaps = 0;
     45 
     46 
     47 /* --------- Registers. --------- */
     48 
     49 const RRegUniverse* getRRegUniverse_ARM ( void )
     50 {
     51    /* The real-register universe is a big constant, so we just want to
     52       initialise it once. */
     53    static RRegUniverse rRegUniverse_ARM;
     54    static Bool         rRegUniverse_ARM_initted = False;
     55 
     56    /* Handy shorthand, nothing more */
     57    RRegUniverse* ru = &rRegUniverse_ARM;
     58 
     59    /* This isn't thread-safe.  Sigh. */
     60    if (LIKELY(rRegUniverse_ARM_initted))
     61       return ru;
     62 
     63    RRegUniverse__init(ru);
     64 
     65    /* Add the registers.  The initial segment of this array must be
     66       those available for allocation by reg-alloc, and those that
     67       follow are not available for allocation. */
     68 
     69    /* Callee saves ones are listed first, since we prefer them
     70       if they're available. */
     71    ru->regs[ru->size++] = hregARM_R4();
     72    ru->regs[ru->size++] = hregARM_R5();
     73    ru->regs[ru->size++] = hregARM_R6();
     74    ru->regs[ru->size++] = hregARM_R7();
     75    ru->regs[ru->size++] = hregARM_R10();
     76    ru->regs[ru->size++] = hregARM_R11();
     77    /* Otherwise we'll have to slum it out with caller-saves ones. */
     78    ru->regs[ru->size++] = hregARM_R0();
     79    ru->regs[ru->size++] = hregARM_R1();
     80    ru->regs[ru->size++] = hregARM_R2();
     81    ru->regs[ru->size++] = hregARM_R3();
     82    ru->regs[ru->size++] = hregARM_R9();
     83    /* FP registers.  Note: these are all callee-save.  Yay!  Hence we
     84       don't need to mention them as trashed in getHRegUsage for
     85       ARMInstr_Call. */
     86    ru->regs[ru->size++] = hregARM_D8();
     87    ru->regs[ru->size++] = hregARM_D9();
     88    ru->regs[ru->size++] = hregARM_D10();
     89    ru->regs[ru->size++] = hregARM_D11();
     90    ru->regs[ru->size++] = hregARM_D12();
     91    ru->regs[ru->size++] = hregARM_S26();
     92    ru->regs[ru->size++] = hregARM_S27();
     93    ru->regs[ru->size++] = hregARM_S28();
     94    ru->regs[ru->size++] = hregARM_S29();
     95    ru->regs[ru->size++] = hregARM_S30();
     96    ru->regs[ru->size++] = hregARM_Q8();
     97    ru->regs[ru->size++] = hregARM_Q9();
     98    ru->regs[ru->size++] = hregARM_Q10();
     99    ru->regs[ru->size++] = hregARM_Q11();
    100    ru->regs[ru->size++] = hregARM_Q12();
    101    ru->allocable = ru->size;
    102 
    103    /* And other regs, not available to the allocator. */
    104 
    105    // unavail: r8 as GSP
    106    // r12 is used as a spill/reload temporary
    107    // r13 as SP
    108    // r14 as LR
    109    // r15 as PC
    110    //
    111    // All in all, we have 11 allocatable integer registers:
    112    // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
    113    // and r12 dedicated as a spill temporary.
    114    // 13 14 and 15 are not under the allocator's control.
    115    //
    116    // Hence for the allocatable registers we have:
    117    //
    118    // callee-saved: 4 5 6 7 (8) 9 10 11
    119    // caller-saved: 0 1 2 3
    120    // Note 9 is ambiguous: the base EABI does not give an e/r-saved
    121    // designation for it, but the Linux instantiation of the ABI
    122    // specifies it as callee-saved.
    123    //
    124    // If the set of available registers changes or if the e/r status
    125    // changes, be sure to re-check/sync the definition of
    126    // getHRegUsage for ARMInstr_Call too.
    127    ru->regs[ru->size++] = hregARM_R8();
    128    ru->regs[ru->size++] = hregARM_R12();
    129    ru->regs[ru->size++] = hregARM_R13();
    130    ru->regs[ru->size++] = hregARM_R14();
    131    ru->regs[ru->size++] = hregARM_R15();
    132    ru->regs[ru->size++] = hregARM_Q13();
    133    ru->regs[ru->size++] = hregARM_Q14();
    134    ru->regs[ru->size++] = hregARM_Q15();
    135 
    136    rRegUniverse_ARM_initted = True;
    137 
    138    RRegUniverse__check_is_sane(ru);
    139    return ru;
    140 }
    141 
    142 
    143 void ppHRegARM ( HReg reg )  {
    144    Int r;
    145    /* Be generic for all virtual regs. */
    146    if (hregIsVirtual(reg)) {
    147       ppHReg(reg);
    148       return;
    149    }
    150    /* But specific for real regs. */
    151    switch (hregClass(reg)) {
    152       case HRcInt32:
    153          r = hregEncoding(reg);
    154          vassert(r >= 0 && r < 16);
    155          vex_printf("r%d", r);
    156          return;
    157       case HRcFlt64:
    158          r = hregEncoding(reg);
    159          vassert(r >= 0 && r < 32);
    160          vex_printf("d%d", r);
    161          return;
    162       case HRcFlt32:
    163          r = hregEncoding(reg);
    164          vassert(r >= 0 && r < 32);
    165          vex_printf("s%d", r);
    166          return;
    167       case HRcVec128:
    168          r = hregEncoding(reg);
    169          vassert(r >= 0 && r < 16);
    170          vex_printf("q%d", r);
    171          return;
    172       default:
    173          vpanic("ppHRegARM");
    174    }
    175 }
    176 
    177 
    178 /* --------- Condition codes, ARM encoding. --------- */
    179 
    180 const HChar* showARMCondCode ( ARMCondCode cond ) {
    181    switch (cond) {
    182        case ARMcc_EQ:  return "eq";
    183        case ARMcc_NE:  return "ne";
    184        case ARMcc_HS:  return "hs";
    185        case ARMcc_LO:  return "lo";
    186        case ARMcc_MI:  return "mi";
    187        case ARMcc_PL:  return "pl";
    188        case ARMcc_VS:  return "vs";
    189        case ARMcc_VC:  return "vc";
    190        case ARMcc_HI:  return "hi";
    191        case ARMcc_LS:  return "ls";
    192        case ARMcc_GE:  return "ge";
    193        case ARMcc_LT:  return "lt";
    194        case ARMcc_GT:  return "gt";
    195        case ARMcc_LE:  return "le";
    196        case ARMcc_AL:  return "al"; // default
    197        case ARMcc_NV:  return "nv";
    198        default: vpanic("showARMCondCode");
    199    }
    200 }
    201 
    202 
    203 /* --------- Mem AModes: Addressing Mode 1 --------- */
    204 
    205 ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
    206    ARMAMode1* am        = LibVEX_Alloc_inline(sizeof(ARMAMode1));
    207    am->tag              = ARMam1_RI;
    208    am->ARMam1.RI.reg    = reg;
    209    am->ARMam1.RI.simm13 = simm13;
    210    vassert(-4095 <= simm13 && simm13 <= 4095);
    211    return am;
    212 }
    213 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
    214    ARMAMode1* am        = LibVEX_Alloc_inline(sizeof(ARMAMode1));
    215    am->tag              = ARMam1_RRS;
    216    am->ARMam1.RRS.base  = base;
    217    am->ARMam1.RRS.index = index;
    218    am->ARMam1.RRS.shift = shift;
    219    vassert(0 <= shift && shift <= 3);
    220    return am;
    221 }
    222 
    223 void ppARMAMode1 ( ARMAMode1* am ) {
    224    switch (am->tag) {
    225       case ARMam1_RI:
    226          vex_printf("%d(", am->ARMam1.RI.simm13);
    227          ppHRegARM(am->ARMam1.RI.reg);
    228          vex_printf(")");
    229          break;
    230       case ARMam1_RRS:
    231          vex_printf("(");
    232          ppHRegARM(am->ARMam1.RRS.base);
    233          vex_printf(",");
    234          ppHRegARM(am->ARMam1.RRS.index);
    235          vex_printf(",%u)", am->ARMam1.RRS.shift);
    236          break;
    237       default:
    238          vassert(0);
    239    }
    240 }
    241 
    242 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
    243    switch (am->tag) {
    244       case ARMam1_RI:
    245          addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
    246          return;
    247       case ARMam1_RRS:
    248          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
    249          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
    250          //   return;
    251       default:
    252          vpanic("addRegUsage_ARMAmode1");
    253    }
    254 }
    255 
    256 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
    257    switch (am->tag) {
    258       case ARMam1_RI:
    259          am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
    260          return;
    261       case ARMam1_RRS:
    262          //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
    263          //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
    264          //return;
    265       default:
    266          vpanic("mapRegs_ARMAmode1");
    267    }
    268 }
    269 
    270 
    271 /* --------- Mem AModes: Addressing Mode 2 --------- */
    272 
    273 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
    274    ARMAMode2* am       = LibVEX_Alloc_inline(sizeof(ARMAMode2));
    275    am->tag             = ARMam2_RI;
    276    am->ARMam2.RI.reg   = reg;
    277    am->ARMam2.RI.simm9 = simm9;
    278    vassert(-255 <= simm9 && simm9 <= 255);
    279    return am;
    280 }
    281 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
    282    ARMAMode2* am       = LibVEX_Alloc_inline(sizeof(ARMAMode2));
    283    am->tag             = ARMam2_RR;
    284    am->ARMam2.RR.base  = base;
    285    am->ARMam2.RR.index = index;
    286    return am;
    287 }
    288 
    289 void ppARMAMode2 ( ARMAMode2* am ) {
    290    switch (am->tag) {
    291       case ARMam2_RI:
    292          vex_printf("%d(", am->ARMam2.RI.simm9);
    293          ppHRegARM(am->ARMam2.RI.reg);
    294          vex_printf(")");
    295          break;
    296       case ARMam2_RR:
    297          vex_printf("(");
    298          ppHRegARM(am->ARMam2.RR.base);
    299          vex_printf(",");
    300          ppHRegARM(am->ARMam2.RR.index);
    301          vex_printf(")");
    302          break;
    303       default:
    304          vassert(0);
    305    }
    306 }
    307 
    308 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
    309    switch (am->tag) {
    310       case ARMam2_RI:
    311          addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
    312          return;
    313       case ARMam2_RR:
    314          //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
    315          //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
    316          //   return;
    317       default:
    318          vpanic("addRegUsage_ARMAmode2");
    319    }
    320 }
    321 
    322 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
    323    switch (am->tag) {
    324       case ARMam2_RI:
    325          am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
    326          return;
    327       case ARMam2_RR:
    328          //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
    329          //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
    330          //return;
    331       default:
    332          vpanic("mapRegs_ARMAmode2");
    333    }
    334 }
    335 
    336 
    337 /* --------- Mem AModes: Addressing Mode VFP --------- */
    338 
    339 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
    340    ARMAModeV* am = LibVEX_Alloc_inline(sizeof(ARMAModeV));
    341    vassert(simm11 >= -1020 && simm11 <= 1020);
    342    vassert(0 == (simm11 & 3));
    343    am->reg    = reg;
    344    am->simm11 = simm11;
    345    return am;
    346 }
    347 
    348 void ppARMAModeV ( ARMAModeV* am ) {
    349    vex_printf("%d(", am->simm11);
    350    ppHRegARM(am->reg);
    351    vex_printf(")");
    352 }
    353 
    354 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
    355    addHRegUse(u, HRmRead, am->reg);
    356 }
    357 
    358 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
    359    am->reg = lookupHRegRemap(m, am->reg);
    360 }
    361 
    362 
    363 /* --------- Mem AModes: Addressing Mode Neon ------- */
    364 
    365 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
    366    ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
    367    am->tag = ARMamN_RR;
    368    am->ARMamN.RR.rN = rN;
    369    am->ARMamN.RR.rM = rM;
    370    return am;
    371 }
    372 
    373 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
    374    ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
    375    am->tag = ARMamN_R;
    376    am->ARMamN.R.rN = rN;
    377    return am;
    378 }
    379 
    380 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
    381    if (am->tag == ARMamN_R) {
    382       addHRegUse(u, HRmRead, am->ARMamN.R.rN);
    383    } else {
    384       addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
    385       addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
    386    }
    387 }
    388 
    389 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
    390    if (am->tag == ARMamN_R) {
    391       am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
    392    } else {
    393       am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
    394       am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
    395    }
    396 }
    397 
    398 void ppARMAModeN ( ARMAModeN* am ) {
    399    vex_printf("[");
    400    if (am->tag == ARMamN_R) {
    401       ppHRegARM(am->ARMamN.R.rN);
    402    } else {
    403       ppHRegARM(am->ARMamN.RR.rN);
    404    }
    405    vex_printf("]");
    406    if (am->tag == ARMamN_RR) {
    407       vex_printf(", ");
    408       ppHRegARM(am->ARMamN.RR.rM);
    409    }
    410 }
    411 
    412 
    413 /* --------- Reg or imm-8x4 operands --------- */
    414 
    415 static UInt ROR32 ( UInt x, UInt sh ) {
    416    vassert(sh >= 0 && sh < 32);
    417    if (sh == 0)
    418       return x;
    419    else
    420       return (x << (32-sh)) | (x >> sh);
    421 }
    422 
    423 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
    424    ARMRI84* ri84          = LibVEX_Alloc_inline(sizeof(ARMRI84));
    425    ri84->tag              = ARMri84_I84;
    426    ri84->ARMri84.I84.imm8 = imm8;
    427    ri84->ARMri84.I84.imm4 = imm4;
    428    vassert(imm8 >= 0 && imm8 <= 255);
    429    vassert(imm4 >= 0 && imm4 <= 15);
    430    return ri84;
    431 }
    432 ARMRI84* ARMRI84_R ( HReg reg ) {
    433    ARMRI84* ri84       = LibVEX_Alloc_inline(sizeof(ARMRI84));
    434    ri84->tag           = ARMri84_R;
    435    ri84->ARMri84.R.reg = reg;
    436    return ri84;
    437 }
    438 
    439 void ppARMRI84 ( ARMRI84* ri84 ) {
    440    switch (ri84->tag) {
    441       case ARMri84_I84:
    442          vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
    443                                   2 * ri84->ARMri84.I84.imm4));
    444          break;
    445       case ARMri84_R:
    446          ppHRegARM(ri84->ARMri84.R.reg);
    447          break;
    448       default:
    449          vassert(0);
    450    }
    451 }
    452 
    453 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
    454    switch (ri84->tag) {
    455       case ARMri84_I84:
    456          return;
    457       case ARMri84_R:
    458          addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
    459          return;
    460       default:
    461          vpanic("addRegUsage_ARMRI84");
    462    }
    463 }
    464 
    465 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
    466    switch (ri84->tag) {
    467       case ARMri84_I84:
    468          return;
    469       case ARMri84_R:
    470          ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
    471          return;
    472       default:
    473          vpanic("mapRegs_ARMRI84");
    474    }
    475 }
    476 
    477 
    478 /* --------- Reg or imm5 operands --------- */
    479 
    480 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
    481    ARMRI5* ri5         = LibVEX_Alloc_inline(sizeof(ARMRI5));
    482    ri5->tag            = ARMri5_I5;
    483    ri5->ARMri5.I5.imm5 = imm5;
    484    vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
    485    return ri5;
    486 }
    487 ARMRI5* ARMRI5_R ( HReg reg ) {
    488    ARMRI5* ri5       = LibVEX_Alloc_inline(sizeof(ARMRI5));
    489    ri5->tag          = ARMri5_R;
    490    ri5->ARMri5.R.reg = reg;
    491    return ri5;
    492 }
    493 
    494 void ppARMRI5 ( ARMRI5* ri5 ) {
    495    switch (ri5->tag) {
    496       case ARMri5_I5:
    497          vex_printf("%u", ri5->ARMri5.I5.imm5);
    498          break;
    499       case ARMri5_R:
    500          ppHRegARM(ri5->ARMri5.R.reg);
    501          break;
    502       default:
    503          vassert(0);
    504    }
    505 }
    506 
    507 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
    508    switch (ri5->tag) {
    509       case ARMri5_I5:
    510          return;
    511       case ARMri5_R:
    512          addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
    513          return;
    514       default:
    515          vpanic("addRegUsage_ARMRI5");
    516    }
    517 }
    518 
    519 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
    520    switch (ri5->tag) {
    521       case ARMri5_I5:
    522          return;
    523       case ARMri5_R:
    524          ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
    525          return;
    526       default:
    527          vpanic("mapRegs_ARMRI5");
    528    }
    529 }
    530 
    531 /* -------- Neon Immediate operatnd --------- */
    532 
    533 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
    534    ARMNImm* i = LibVEX_Alloc_inline(sizeof(ARMNImm));
    535    i->type = type;
    536    i->imm8 = imm8;
    537    return i;
    538 }
    539 
    540 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
    541    int i, j;
    542    ULong y, x = imm->imm8;
    543    switch (imm->type) {
    544       case 3:
    545          x = x << 8; /* fallthrough */
    546       case 2:
    547          x = x << 8; /* fallthrough */
    548       case 1:
    549          x = x << 8; /* fallthrough */
    550       case 0:
    551          return (x << 32) | x;
    552       case 5:
    553       case 6:
    554          if (imm->type == 5)
    555             x = x << 8;
    556          else
    557             x = (x << 8) | x;
    558          /* fallthrough */
    559       case 4:
    560          x = (x << 16) | x;
    561          return (x << 32) | x;
    562       case 8:
    563          x = (x << 8) | 0xFF;
    564          /* fallthrough */
    565       case 7:
    566          x = (x << 8) | 0xFF;
    567          return (x << 32) | x;
    568       case 9:
    569          x = 0;
    570          for (i = 7; i >= 0; i--) {
    571             y = ((ULong)imm->imm8 >> i) & 1;
    572             for (j = 0; j < 8; j++) {
    573                x = (x << 1) | y;
    574             }
    575          }
    576          return x;
    577       case 10:
    578          x |= (x & 0x80) << 5;
    579          x |= (~x & 0x40) << 5;
    580          x &= 0x187F; /* 0001 1000 0111 1111 */
    581          x |= (x & 0x40) << 4;
    582          x |= (x & 0x40) << 3;
    583          x |= (x & 0x40) << 2;
    584          x |= (x & 0x40) << 1;
    585          x = x << 19;
    586          x = (x << 32) | x;
    587          return x;
    588       default:
    589          vpanic("ARMNImm_to_Imm64");
    590    }
    591 }
    592 
    593 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
    594    ARMNImm tmp;
    595    if ((x & 0xFFFFFFFF) == (x >> 32)) {
    596       if ((x & 0xFFFFFF00) == 0)
    597          return ARMNImm_TI(0, x & 0xFF);
    598       if ((x & 0xFFFF00FF) == 0)
    599          return ARMNImm_TI(1, (x >> 8) & 0xFF);
    600       if ((x & 0xFF00FFFF) == 0)
    601          return ARMNImm_TI(2, (x >> 16) & 0xFF);
    602       if ((x & 0x00FFFFFF) == 0)
    603          return ARMNImm_TI(3, (x >> 24) & 0xFF);
    604       if ((x & 0xFFFF00FF) == 0xFF)
    605          return ARMNImm_TI(7, (x >> 8) & 0xFF);
    606       if ((x & 0xFF00FFFF) == 0xFFFF)
    607          return ARMNImm_TI(8, (x >> 16) & 0xFF);
    608       if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
    609          if ((x & 0xFF00) == 0)
    610             return ARMNImm_TI(4, x & 0xFF);
    611          if ((x & 0x00FF) == 0)
    612             return ARMNImm_TI(5, (x >> 8) & 0xFF);
    613          if ((x & 0xFF) == ((x >> 8) & 0xFF))
    614             return ARMNImm_TI(6, x & 0xFF);
    615       }
    616       if ((x & 0x7FFFF) == 0) {
    617          tmp.type = 10;
    618          tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
    619          if (ARMNImm_to_Imm64(&tmp) == x)
    620             return ARMNImm_TI(tmp.type, tmp.imm8);
    621       }
    622    } else {
    623       /* This can only be type 9. */
    624       tmp.imm8 = (((x >> 56) & 1) << 7)
    625                | (((x >> 48) & 1) << 6)
    626                | (((x >> 40) & 1) << 5)
    627                | (((x >> 32) & 1) << 4)
    628                | (((x >> 24) & 1) << 3)
    629                | (((x >> 16) & 1) << 2)
    630                | (((x >>  8) & 1) << 1)
    631                | (((x >>  0) & 1) << 0);
    632       tmp.type = 9;
    633       if (ARMNImm_to_Imm64 (&tmp) == x)
    634          return ARMNImm_TI(tmp.type, tmp.imm8);
    635    }
    636    return NULL;
    637 }
    638 
    639 void ppARMNImm (ARMNImm* i) {
    640    ULong x = ARMNImm_to_Imm64(i);
    641    vex_printf("0x%llX%llX", x, x);
    642 }
    643 
    644 /* -- Register or scalar operand --- */
    645 
    646 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
    647 {
    648    ARMNRS *p = LibVEX_Alloc_inline(sizeof(ARMNRS));
    649    p->tag = tag;
    650    p->reg = reg;
    651    p->index = index;
    652    return p;
    653 }
    654 
    655 void ppARMNRS(ARMNRS *p)
    656 {
    657    ppHRegARM(p->reg);
    658    if (p->tag == ARMNRS_Scalar) {
    659       vex_printf("[%u]", p->index);
    660    }
    661 }
    662 
    663 /* --------- Instructions. --------- */
    664 
    665 const HChar* showARMAluOp ( ARMAluOp op ) {
    666    switch (op) {
    667       case ARMalu_ADD:  return "add";
    668       case ARMalu_ADDS: return "adds";
    669       case ARMalu_ADC:  return "adc";
    670       case ARMalu_SUB:  return "sub";
    671       case ARMalu_SUBS: return "subs";
    672       case ARMalu_SBC:  return "sbc";
    673       case ARMalu_AND:  return "and";
    674       case ARMalu_BIC:  return "bic";
    675       case ARMalu_OR:   return "orr";
    676       case ARMalu_XOR:  return "xor";
    677       default: vpanic("showARMAluOp");
    678    }
    679 }
    680 
    681 const HChar* showARMShiftOp ( ARMShiftOp op ) {
    682    switch (op) {
    683       case ARMsh_SHL: return "shl";
    684       case ARMsh_SHR: return "shr";
    685       case ARMsh_SAR: return "sar";
    686       default: vpanic("showARMShiftOp");
    687    }
    688 }
    689 
    690 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
    691    switch (op) {
    692       case ARMun_NEG: return "neg";
    693       case ARMun_NOT: return "not";
    694       case ARMun_CLZ: return "clz";
    695       default: vpanic("showARMUnaryOp");
    696    }
    697 }
    698 
    699 const HChar* showARMMulOp ( ARMMulOp op ) {
    700    switch (op) {
    701       case ARMmul_PLAIN: return "mul";
    702       case ARMmul_ZX:    return "umull";
    703       case ARMmul_SX:    return "smull";
    704       default: vpanic("showARMMulOp");
    705    }
    706 }
    707 
    708 const HChar* showARMVfpOp ( ARMVfpOp op ) {
    709    switch (op) {
    710       case ARMvfp_ADD: return "add";
    711       case ARMvfp_SUB: return "sub";
    712       case ARMvfp_MUL: return "mul";
    713       case ARMvfp_DIV: return "div";
    714       default: vpanic("showARMVfpOp");
    715    }
    716 }
    717 
    718 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
    719    switch (op) {
    720       case ARMvfpu_COPY: return "cpy";
    721       case ARMvfpu_NEG:  return "neg";
    722       case ARMvfpu_ABS:  return "abs";
    723       case ARMvfpu_SQRT: return "sqrt";
    724       default: vpanic("showARMVfpUnaryOp");
    725    }
    726 }
    727 
    728 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
    729    switch (op) {
    730       case ARMneon_VAND: return "vand";
    731       case ARMneon_VORR: return "vorr";
    732       case ARMneon_VXOR: return "veor";
    733       case ARMneon_VADD: return "vadd";
    734       case ARMneon_VRHADDS: return "vrhadd";
    735       case ARMneon_VRHADDU: return "vrhadd";
    736       case ARMneon_VADDFP: return "vadd";
    737       case ARMneon_VPADDFP: return "vpadd";
    738       case ARMneon_VABDFP: return "vabd";
    739       case ARMneon_VSUB: return "vsub";
    740       case ARMneon_VSUBFP: return "vsub";
    741       case ARMneon_VMINU: return "vmin";
    742       case ARMneon_VMINS: return "vmin";
    743       case ARMneon_VMINF: return "vmin";
    744       case ARMneon_VMAXU: return "vmax";
    745       case ARMneon_VMAXS: return "vmax";
    746       case ARMneon_VMAXF: return "vmax";
    747       case ARMneon_VQADDU: return "vqadd";
    748       case ARMneon_VQADDS: return "vqadd";
    749       case ARMneon_VQSUBU: return "vqsub";
    750       case ARMneon_VQSUBS: return "vqsub";
    751       case ARMneon_VCGTU:  return "vcgt";
    752       case ARMneon_VCGTS:  return "vcgt";
    753       case ARMneon_VCGTF:  return "vcgt";
    754       case ARMneon_VCGEF:  return "vcgt";
    755       case ARMneon_VCGEU:  return "vcge";
    756       case ARMneon_VCGES:  return "vcge";
    757       case ARMneon_VCEQ:  return "vceq";
    758       case ARMneon_VCEQF:  return "vceq";
    759       case ARMneon_VPADD:   return "vpadd";
    760       case ARMneon_VPMINU:   return "vpmin";
    761       case ARMneon_VPMINS:   return "vpmin";
    762       case ARMneon_VPMINF:   return "vpmin";
    763       case ARMneon_VPMAXU:   return "vpmax";
    764       case ARMneon_VPMAXS:   return "vpmax";
    765       case ARMneon_VPMAXF:   return "vpmax";
    766       case ARMneon_VEXT:   return "vext";
    767       case ARMneon_VMUL:   return "vmuli";
    768       case ARMneon_VMULLU:   return "vmull";
    769       case ARMneon_VMULLS:   return "vmull";
    770       case ARMneon_VMULP:  return "vmul";
    771       case ARMneon_VMULFP:  return "vmul";
    772       case ARMneon_VMULLP:  return "vmul";
    773       case ARMneon_VQDMULH: return "vqdmulh";
    774       case ARMneon_VQRDMULH: return "vqrdmulh";
    775       case ARMneon_VQDMULL: return "vqdmull";
    776       case ARMneon_VTBL: return "vtbl";
    777       case ARMneon_VRECPS: return "vrecps";
    778       case ARMneon_VRSQRTS: return "vrecps";
    779       case ARMneon_INVALID: return "??invalid??";
    780       /* ... */
    781       default: vpanic("showARMNeonBinOp");
    782    }
    783 }
    784 
    785 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
    786    switch (op) {
    787       case ARMneon_VAND:
    788       case ARMneon_VORR:
    789       case ARMneon_VXOR:
    790          return "";
    791       case ARMneon_VADD:
    792       case ARMneon_VSUB:
    793       case ARMneon_VEXT:
    794       case ARMneon_VMUL:
    795       case ARMneon_VPADD:
    796       case ARMneon_VTBL:
    797       case ARMneon_VCEQ:
    798          return ".i";
    799       case ARMneon_VRHADDU:
    800       case ARMneon_VMINU:
    801       case ARMneon_VMAXU:
    802       case ARMneon_VQADDU:
    803       case ARMneon_VQSUBU:
    804       case ARMneon_VCGTU:
    805       case ARMneon_VCGEU:
    806       case ARMneon_VMULLU:
    807       case ARMneon_VPMINU:
    808       case ARMneon_VPMAXU:
    809          return ".u";
    810       case ARMneon_VRHADDS:
    811       case ARMneon_VMINS:
    812       case ARMneon_VMAXS:
    813       case ARMneon_VQADDS:
    814       case ARMneon_VQSUBS:
    815       case ARMneon_VCGTS:
    816       case ARMneon_VCGES:
    817       case ARMneon_VQDMULL:
    818       case ARMneon_VMULLS:
    819       case ARMneon_VPMINS:
    820       case ARMneon_VPMAXS:
    821       case ARMneon_VQDMULH:
    822       case ARMneon_VQRDMULH:
    823          return ".s";
    824       case ARMneon_VMULP:
    825       case ARMneon_VMULLP:
    826          return ".p";
    827       case ARMneon_VADDFP:
    828       case ARMneon_VABDFP:
    829       case ARMneon_VPADDFP:
    830       case ARMneon_VSUBFP:
    831       case ARMneon_VMULFP:
    832       case ARMneon_VMINF:
    833       case ARMneon_VMAXF:
    834       case ARMneon_VPMINF:
    835       case ARMneon_VPMAXF:
    836       case ARMneon_VCGTF:
    837       case ARMneon_VCGEF:
    838       case ARMneon_VCEQF:
    839       case ARMneon_VRECPS:
    840       case ARMneon_VRSQRTS:
    841          return ".f";
    842       /* ... */
    843       default: vpanic("showARMNeonBinOpDataType");
    844    }
    845 }
    846 
    847 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
    848    switch (op) {
    849       case ARMneon_COPY: return "vmov";
    850       case ARMneon_COPYLS: return "vmov";
    851       case ARMneon_COPYLU: return "vmov";
    852       case ARMneon_COPYN: return "vmov";
    853       case ARMneon_COPYQNSS: return "vqmovn";
    854       case ARMneon_COPYQNUS: return "vqmovun";
    855       case ARMneon_COPYQNUU: return "vqmovn";
    856       case ARMneon_NOT: return "vmvn";
    857       case ARMneon_EQZ: return "vceq";
    858       case ARMneon_CNT: return "vcnt";
    859       case ARMneon_CLS: return "vcls";
    860       case ARMneon_CLZ: return "vclz";
    861       case ARMneon_DUP: return "vdup";
    862       case ARMneon_PADDLS: return "vpaddl";
    863       case ARMneon_PADDLU: return "vpaddl";
    864       case ARMneon_VQSHLNSS: return "vqshl";
    865       case ARMneon_VQSHLNUU: return "vqshl";
    866       case ARMneon_VQSHLNUS: return "vqshlu";
    867       case ARMneon_REV16: return "vrev16";
    868       case ARMneon_REV32: return "vrev32";
    869       case ARMneon_REV64: return "vrev64";
    870       case ARMneon_VCVTFtoU: return "vcvt";
    871       case ARMneon_VCVTFtoS: return "vcvt";
    872       case ARMneon_VCVTUtoF: return "vcvt";
    873       case ARMneon_VCVTStoF: return "vcvt";
    874       case ARMneon_VCVTFtoFixedU: return "vcvt";
    875       case ARMneon_VCVTFtoFixedS: return "vcvt";
    876       case ARMneon_VCVTFixedUtoF: return "vcvt";
    877       case ARMneon_VCVTFixedStoF: return "vcvt";
    878       case ARMneon_VCVTF32toF16: return "vcvt";
    879       case ARMneon_VCVTF16toF32: return "vcvt";
    880       case ARMneon_VRECIP: return "vrecip";
    881       case ARMneon_VRECIPF: return "vrecipf";
    882       case ARMneon_VNEGF: return "vneg";
    883       case ARMneon_ABS: return "vabs";
    884       case ARMneon_VABSFP: return "vabsfp";
    885       case ARMneon_VRSQRTEFP: return "vrsqrtefp";
    886       case ARMneon_VRSQRTE: return "vrsqrte";
    887       /* ... */
    888       default: vpanic("showARMNeonUnOp");
    889    }
    890 }
    891 
    892 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
    893    switch (op) {
    894       case ARMneon_COPY:
    895       case ARMneon_NOT:
    896          return "";
    897       case ARMneon_COPYN:
    898       case ARMneon_EQZ:
    899       case ARMneon_CNT:
    900       case ARMneon_DUP:
    901       case ARMneon_REV16:
    902       case ARMneon_REV32:
    903       case ARMneon_REV64:
    904          return ".i";
    905       case ARMneon_COPYLU:
    906       case ARMneon_PADDLU:
    907       case ARMneon_COPYQNUU:
    908       case ARMneon_VQSHLNUU:
    909       case ARMneon_VRECIP:
    910       case ARMneon_VRSQRTE:
    911          return ".u";
    912       case ARMneon_CLS:
    913       case ARMneon_CLZ:
    914       case ARMneon_COPYLS:
    915       case ARMneon_PADDLS:
    916       case ARMneon_COPYQNSS:
    917       case ARMneon_COPYQNUS:
    918       case ARMneon_VQSHLNSS:
    919       case ARMneon_VQSHLNUS:
    920       case ARMneon_ABS:
    921          return ".s";
    922       case ARMneon_VRECIPF:
    923       case ARMneon_VNEGF:
    924       case ARMneon_VABSFP:
    925       case ARMneon_VRSQRTEFP:
    926          return ".f";
    927       case ARMneon_VCVTFtoU: return ".u32.f32";
    928       case ARMneon_VCVTFtoS: return ".s32.f32";
    929       case ARMneon_VCVTUtoF: return ".f32.u32";
    930       case ARMneon_VCVTStoF: return ".f32.s32";
    931       case ARMneon_VCVTF16toF32: return ".f32.f16";
    932       case ARMneon_VCVTF32toF16: return ".f16.f32";
    933       case ARMneon_VCVTFtoFixedU: return ".u32.f32";
    934       case ARMneon_VCVTFtoFixedS: return ".s32.f32";
    935       case ARMneon_VCVTFixedUtoF: return ".f32.u32";
    936       case ARMneon_VCVTFixedStoF: return ".f32.s32";
    937       /* ... */
    938       default: vpanic("showARMNeonUnOpDataType");
    939    }
    940 }
    941 
    942 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
    943    switch (op) {
    944       case ARMneon_SETELEM: return "vmov";
    945       case ARMneon_GETELEMU: return "vmov";
    946       case ARMneon_GETELEMS: return "vmov";
    947       case ARMneon_VDUP: return "vdup";
    948       /* ... */
    949       default: vpanic("showARMNeonUnarySOp");
    950    }
    951 }
    952 
    953 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
    954    switch (op) {
    955       case ARMneon_SETELEM:
    956       case ARMneon_VDUP:
    957          return ".i";
    958       case ARMneon_GETELEMS:
    959          return ".s";
    960       case ARMneon_GETELEMU:
    961          return ".u";
    962       /* ... */
    963       default: vpanic("showARMNeonUnarySOp");
    964    }
    965 }
    966 
    967 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
    968    switch (op) {
    969       case ARMneon_VSHL: return "vshl";
    970       case ARMneon_VSAL: return "vshl";
    971       case ARMneon_VQSHL: return "vqshl";
    972       case ARMneon_VQSAL: return "vqshl";
    973       /* ... */
    974       default: vpanic("showARMNeonShiftOp");
    975    }
    976 }
    977 
    978 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
    979    switch (op) {
    980       case ARMneon_VSHL:
    981       case ARMneon_VQSHL:
    982          return ".u";
    983       case ARMneon_VSAL:
    984       case ARMneon_VQSAL:
    985          return ".s";
    986       /* ... */
    987       default: vpanic("showARMNeonShiftOpDataType");
    988    }
    989 }
    990 
    991 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
    992    switch (op) {
    993       case ARMneon_TRN: return "vtrn";
    994       case ARMneon_ZIP: return "vzip";
    995       case ARMneon_UZP: return "vuzp";
    996       /* ... */
    997       default: vpanic("showARMNeonDualOp");
    998    }
    999 }
   1000 
   1001 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
   1002    switch (op) {
   1003       case ARMneon_TRN:
   1004       case ARMneon_ZIP:
   1005       case ARMneon_UZP:
   1006          return "i";
   1007       /* ... */
   1008       default: vpanic("showARMNeonDualOp");
   1009    }
   1010 }
   1011 
   1012 static const HChar* showARMNeonDataSize_wrk ( UInt size )
   1013 {
   1014    switch (size) {
   1015       case 0: return "8";
   1016       case 1: return "16";
   1017       case 2: return "32";
   1018       case 3: return "64";
   1019       default: vpanic("showARMNeonDataSize");
   1020    }
   1021 }
   1022 
   1023 static const HChar* showARMNeonDataSize ( const ARMInstr* i )
   1024 {
   1025    switch (i->tag) {
   1026       case ARMin_NBinary:
   1027          if (i->ARMin.NBinary.op == ARMneon_VEXT)
   1028             return "8";
   1029          if (i->ARMin.NBinary.op == ARMneon_VAND ||
   1030              i->ARMin.NBinary.op == ARMneon_VORR ||
   1031              i->ARMin.NBinary.op == ARMneon_VXOR)
   1032             return "";
   1033          return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
   1034       case ARMin_NUnary:
   1035          if (i->ARMin.NUnary.op == ARMneon_COPY ||
   1036              i->ARMin.NUnary.op == ARMneon_NOT ||
   1037              i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
   1038              i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
   1039              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1040              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1041              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1042              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
   1043              i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
   1044              i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
   1045              i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
   1046              i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
   1047             return "";
   1048          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1049              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1050              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1051             UInt size;
   1052             size = i->ARMin.NUnary.size;
   1053             if (size & 0x40)
   1054                return "64";
   1055             if (size & 0x20)
   1056                return "32";
   1057             if (size & 0x10)
   1058                return "16";
   1059             if (size & 0x08)
   1060                return "8";
   1061             vpanic("showARMNeonDataSize");
   1062          }
   1063          return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
   1064       case ARMin_NUnaryS:
   1065          if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
   1066             int size;
   1067             size = i->ARMin.NUnaryS.size;
   1068             if ((size & 1) == 1)
   1069                return "8";
   1070             if ((size & 3) == 2)
   1071                return "16";
   1072             if ((size & 7) == 4)
   1073                return "32";
   1074             vpanic("showARMNeonDataSize");
   1075          }
   1076          return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
   1077       case ARMin_NShift:
   1078          return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
   1079       case ARMin_NDual:
   1080          return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
   1081       default:
   1082          vpanic("showARMNeonDataSize");
   1083    }
   1084 }
   1085 
   1086 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
   1087                          HReg dst, HReg argL, ARMRI84* argR ) {
   1088    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1089    i->tag            = ARMin_Alu;
   1090    i->ARMin.Alu.op   = op;
   1091    i->ARMin.Alu.dst  = dst;
   1092    i->ARMin.Alu.argL = argL;
   1093    i->ARMin.Alu.argR = argR;
   1094    return i;
   1095 }
   1096 ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
   1097                             HReg dst, HReg argL, ARMRI5* argR ) {
   1098    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1099    i->tag              = ARMin_Shift;
   1100    i->ARMin.Shift.op   = op;
   1101    i->ARMin.Shift.dst  = dst;
   1102    i->ARMin.Shift.argL = argL;
   1103    i->ARMin.Shift.argR = argR;
   1104    return i;
   1105 }
   1106 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
   1107    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1108    i->tag             = ARMin_Unary;
   1109    i->ARMin.Unary.op  = op;
   1110    i->ARMin.Unary.dst = dst;
   1111    i->ARMin.Unary.src = src;
   1112    return i;
   1113 }
   1114 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
   1115    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1116    i->tag                  = ARMin_CmpOrTst;
   1117    i->ARMin.CmpOrTst.isCmp = isCmp;
   1118    i->ARMin.CmpOrTst.argL  = argL;
   1119    i->ARMin.CmpOrTst.argR  = argR;
   1120    return i;
   1121 }
   1122 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
   1123    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1124    i->tag           = ARMin_Mov;
   1125    i->ARMin.Mov.dst = dst;
   1126    i->ARMin.Mov.src = src;
   1127    return i;
   1128 }
   1129 ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
   1130    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1131    i->tag               = ARMin_Imm32;
   1132    i->ARMin.Imm32.dst   = dst;
   1133    i->ARMin.Imm32.imm32 = imm32;
   1134    return i;
   1135 }
   1136 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
   1137                             Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1138    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1139    i->tag                 = ARMin_LdSt32;
   1140    i->ARMin.LdSt32.cc     = cc;
   1141    i->ARMin.LdSt32.isLoad = isLoad;
   1142    i->ARMin.LdSt32.rD     = rD;
   1143    i->ARMin.LdSt32.amode  = amode;
   1144    vassert(cc != ARMcc_NV);
   1145    return i;
   1146 }
   1147 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
   1148                             Bool isLoad, Bool signedLoad,
   1149                             HReg rD, ARMAMode2* amode ) {
   1150    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1151    i->tag                     = ARMin_LdSt16;
   1152    i->ARMin.LdSt16.cc         = cc;
   1153    i->ARMin.LdSt16.isLoad     = isLoad;
   1154    i->ARMin.LdSt16.signedLoad = signedLoad;
   1155    i->ARMin.LdSt16.rD         = rD;
   1156    i->ARMin.LdSt16.amode      = amode;
   1157    vassert(cc != ARMcc_NV);
   1158    return i;
   1159 }
   1160 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
   1161                             Bool isLoad, HReg rD, ARMAMode1* amode ) {
   1162    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1163    i->tag                 = ARMin_LdSt8U;
   1164    i->ARMin.LdSt8U.cc     = cc;
   1165    i->ARMin.LdSt8U.isLoad = isLoad;
   1166    i->ARMin.LdSt8U.rD     = rD;
   1167    i->ARMin.LdSt8U.amode  = amode;
   1168    vassert(cc != ARMcc_NV);
   1169    return i;
   1170 }
   1171 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
   1172    ARMInstr* i         = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1173    i->tag              = ARMin_Ld8S;
   1174    i->ARMin.Ld8S.cc    = cc;
   1175    i->ARMin.Ld8S.rD    = rD;
   1176    i->ARMin.Ld8S.amode = amode;
   1177    vassert(cc != ARMcc_NV);
   1178    return i;
   1179 }
   1180 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
   1181                              ARMCondCode cond, Bool toFastEP ) {
   1182    ARMInstr* i               = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1183    i->tag                    = ARMin_XDirect;
   1184    i->ARMin.XDirect.dstGA    = dstGA;
   1185    i->ARMin.XDirect.amR15T   = amR15T;
   1186    i->ARMin.XDirect.cond     = cond;
   1187    i->ARMin.XDirect.toFastEP = toFastEP;
   1188    return i;
   1189 }
   1190 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
   1191                             ARMCondCode cond ) {
   1192    ARMInstr* i            = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1193    i->tag                 = ARMin_XIndir;
   1194    i->ARMin.XIndir.dstGA  = dstGA;
   1195    i->ARMin.XIndir.amR15T = amR15T;
   1196    i->ARMin.XIndir.cond   = cond;
   1197    return i;
   1198 }
   1199 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
   1200                                ARMCondCode cond, IRJumpKind jk ) {
   1201    ARMInstr* i               = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1202    i->tag                    = ARMin_XAssisted;
   1203    i->ARMin.XAssisted.dstGA  = dstGA;
   1204    i->ARMin.XAssisted.amR15T = amR15T;
   1205    i->ARMin.XAssisted.cond   = cond;
   1206    i->ARMin.XAssisted.jk     = jk;
   1207    return i;
   1208 }
   1209 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
   1210    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1211    i->tag             = ARMin_CMov;
   1212    i->ARMin.CMov.cond = cond;
   1213    i->ARMin.CMov.dst  = dst;
   1214    i->ARMin.CMov.src  = src;
   1215    vassert(cond != ARMcc_AL);
   1216    return i;
   1217 }
   1218 ARMInstr* ARMInstr_Call ( ARMCondCode cond, Addr32 target, Int nArgRegs,
   1219                           RetLoc rloc ) {
   1220    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1221    i->tag                 = ARMin_Call;
   1222    i->ARMin.Call.cond     = cond;
   1223    i->ARMin.Call.target   = target;
   1224    i->ARMin.Call.nArgRegs = nArgRegs;
   1225    i->ARMin.Call.rloc     = rloc;
   1226    vassert(is_sane_RetLoc(rloc));
   1227    return i;
   1228 }
   1229 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
   1230    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1231    i->tag          = ARMin_Mul;
   1232    i->ARMin.Mul.op = op;
   1233    return i;
   1234 }
   1235 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
   1236    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1237    i->tag             = ARMin_LdrEX;
   1238    i->ARMin.LdrEX.szB = szB;
   1239    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1240    return i;
   1241 }
   1242 ARMInstr* ARMInstr_StrEX ( Int szB ) {
   1243    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1244    i->tag             = ARMin_StrEX;
   1245    i->ARMin.StrEX.szB = szB;
   1246    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1247    return i;
   1248 }
   1249 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
   1250    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1251    i->tag                 = ARMin_VLdStD;
   1252    i->ARMin.VLdStD.isLoad = isLoad;
   1253    i->ARMin.VLdStD.dD     = dD;
   1254    i->ARMin.VLdStD.amode  = am;
   1255    return i;
   1256 }
   1257 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
   1258    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1259    i->tag                 = ARMin_VLdStS;
   1260    i->ARMin.VLdStS.isLoad = isLoad;
   1261    i->ARMin.VLdStS.fD     = fD;
   1262    i->ARMin.VLdStS.amode  = am;
   1263    return i;
   1264 }
   1265 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1266    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1267    i->tag              = ARMin_VAluD;
   1268    i->ARMin.VAluD.op   = op;
   1269    i->ARMin.VAluD.dst  = dst;
   1270    i->ARMin.VAluD.argL = argL;
   1271    i->ARMin.VAluD.argR = argR;
   1272    return i;
   1273 }
   1274 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
   1275    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1276    i->tag              = ARMin_VAluS;
   1277    i->ARMin.VAluS.op   = op;
   1278    i->ARMin.VAluS.dst  = dst;
   1279    i->ARMin.VAluS.argL = argL;
   1280    i->ARMin.VAluS.argR = argR;
   1281    return i;
   1282 }
   1283 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1284    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1285    i->tag               = ARMin_VUnaryD;
   1286    i->ARMin.VUnaryD.op  = op;
   1287    i->ARMin.VUnaryD.dst = dst;
   1288    i->ARMin.VUnaryD.src = src;
   1289    return i;
   1290 }
   1291 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
   1292    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1293    i->tag               = ARMin_VUnaryS;
   1294    i->ARMin.VUnaryS.op  = op;
   1295    i->ARMin.VUnaryS.dst = dst;
   1296    i->ARMin.VUnaryS.src = src;
   1297    return i;
   1298 }
   1299 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
   1300    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1301    i->tag              = ARMin_VCmpD;
   1302    i->ARMin.VCmpD.argL = argL;
   1303    i->ARMin.VCmpD.argR = argR;
   1304    return i;
   1305 }
   1306 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
   1307    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1308    i->tag               = ARMin_VCMovD;
   1309    i->ARMin.VCMovD.cond = cond;
   1310    i->ARMin.VCMovD.dst  = dst;
   1311    i->ARMin.VCMovD.src  = src;
   1312    vassert(cond != ARMcc_AL);
   1313    return i;
   1314 }
   1315 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
   1316    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1317    i->tag               = ARMin_VCMovS;
   1318    i->ARMin.VCMovS.cond = cond;
   1319    i->ARMin.VCMovS.dst  = dst;
   1320    i->ARMin.VCMovS.src  = src;
   1321    vassert(cond != ARMcc_AL);
   1322    return i;
   1323 }
   1324 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
   1325    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1326    i->tag               = ARMin_VCvtSD;
   1327    i->ARMin.VCvtSD.sToD = sToD;
   1328    i->ARMin.VCvtSD.dst  = dst;
   1329    i->ARMin.VCvtSD.src  = src;
   1330    return i;
   1331 }
   1332 ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo ) {
   1333    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1334    i->tag              = ARMin_VXferQ;
   1335    i->ARMin.VXferQ.toQ = toQ;
   1336    i->ARMin.VXferQ.qD  = qD;
   1337    i->ARMin.VXferQ.dHi = dHi;
   1338    i->ARMin.VXferQ.dLo = dLo;
   1339    return i;
   1340 }
   1341 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
   1342    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1343    i->tag              = ARMin_VXferD;
   1344    i->ARMin.VXferD.toD = toD;
   1345    i->ARMin.VXferD.dD  = dD;
   1346    i->ARMin.VXferD.rHi = rHi;
   1347    i->ARMin.VXferD.rLo = rLo;
   1348    return i;
   1349 }
   1350 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
   1351    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1352    i->tag              = ARMin_VXferS;
   1353    i->ARMin.VXferS.toS = toS;
   1354    i->ARMin.VXferS.fD  = fD;
   1355    i->ARMin.VXferS.rLo = rLo;
   1356    return i;
   1357 }
   1358 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
   1359                             HReg dst, HReg src ) {
   1360    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1361    i->tag                = ARMin_VCvtID;
   1362    i->ARMin.VCvtID.iToD  = iToD;
   1363    i->ARMin.VCvtID.syned = syned;
   1364    i->ARMin.VCvtID.dst   = dst;
   1365    i->ARMin.VCvtID.src   = src;
   1366    return i;
   1367 }
   1368 ARMInstr* ARMInstr_VRIntR ( Bool isF64, HReg dst, HReg src )
   1369 {
   1370    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1371    i->tag                = ARMin_VRIntR;
   1372    i->ARMin.VRIntR.isF64 = isF64;
   1373    i->ARMin.VRIntR.dst   = dst ;
   1374    i->ARMin.VRIntR.src   = src;
   1375    return i;
   1376 }
   1377 ARMInstr* ARMInstr_VMinMaxNum ( Bool isF64, Bool isMax,
   1378                                 HReg dst, HReg srcL, HReg srcR )
   1379 {
   1380    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1381    i->tag = ARMin_VMinMaxNum;
   1382    i->ARMin.VMinMaxNum.isF64 = isF64;
   1383    i->ARMin.VMinMaxNum.isMax = isMax;
   1384    i->ARMin.VMinMaxNum.dst   = dst ;
   1385    i->ARMin.VMinMaxNum.srcL  = srcL;
   1386    i->ARMin.VMinMaxNum.srcR  = srcR;
   1387    return i;
   1388 }
   1389 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
   1390    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1391    i->tag                 = ARMin_FPSCR;
   1392    i->ARMin.FPSCR.toFPSCR = toFPSCR;
   1393    i->ARMin.FPSCR.iReg    = iReg;
   1394    return i;
   1395 }
   1396 ARMInstr* ARMInstr_MFence ( void ) {
   1397    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1398    i->tag      = ARMin_MFence;
   1399    return i;
   1400 }
   1401 ARMInstr* ARMInstr_CLREX( void ) {
   1402    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1403    i->tag      = ARMin_CLREX;
   1404    return i;
   1405 }
   1406 
   1407 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
   1408    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1409    i->tag                  = ARMin_NLdStQ;
   1410    i->ARMin.NLdStQ.isLoad  = isLoad;
   1411    i->ARMin.NLdStQ.dQ      = dQ;
   1412    i->ARMin.NLdStQ.amode   = amode;
   1413    return i;
   1414 }
   1415 
   1416 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
   1417    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1418    i->tag                  = ARMin_NLdStD;
   1419    i->ARMin.NLdStD.isLoad  = isLoad;
   1420    i->ARMin.NLdStD.dD      = dD;
   1421    i->ARMin.NLdStD.amode   = amode;
   1422    return i;
   1423 }
   1424 
   1425 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
   1426                             UInt size, Bool Q ) {
   1427    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1428    i->tag                = ARMin_NUnary;
   1429    i->ARMin.NUnary.op   = op;
   1430    i->ARMin.NUnary.src  = nQ;
   1431    i->ARMin.NUnary.dst  = dQ;
   1432    i->ARMin.NUnary.size = size;
   1433    i->ARMin.NUnary.Q    = Q;
   1434    return i;
   1435 }
   1436 
   1437 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
   1438                              UInt size, Bool Q ) {
   1439    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1440    i->tag                = ARMin_NUnaryS;
   1441    i->ARMin.NUnaryS.op   = op;
   1442    i->ARMin.NUnaryS.src  = src;
   1443    i->ARMin.NUnaryS.dst  = dst;
   1444    i->ARMin.NUnaryS.size = size;
   1445    i->ARMin.NUnaryS.Q    = Q;
   1446    return i;
   1447 }
   1448 
   1449 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
   1450                            UInt size, Bool Q ) {
   1451    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1452    i->tag                = ARMin_NDual;
   1453    i->ARMin.NDual.op   = op;
   1454    i->ARMin.NDual.arg1 = nQ;
   1455    i->ARMin.NDual.arg2 = mQ;
   1456    i->ARMin.NDual.size = size;
   1457    i->ARMin.NDual.Q    = Q;
   1458    return i;
   1459 }
   1460 
   1461 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
   1462                              HReg dst, HReg argL, HReg argR,
   1463                              UInt size, Bool Q ) {
   1464    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1465    i->tag                = ARMin_NBinary;
   1466    i->ARMin.NBinary.op   = op;
   1467    i->ARMin.NBinary.argL = argL;
   1468    i->ARMin.NBinary.argR = argR;
   1469    i->ARMin.NBinary.dst  = dst;
   1470    i->ARMin.NBinary.size = size;
   1471    i->ARMin.NBinary.Q    = Q;
   1472    return i;
   1473 }
   1474 
   1475 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
   1476    ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1477    i->tag         = ARMin_NeonImm;
   1478    i->ARMin.NeonImm.dst = dst;
   1479    i->ARMin.NeonImm.imm = imm;
   1480    return i;
   1481 }
   1482 
   1483 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
   1484    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1485    i->tag               = ARMin_NCMovQ;
   1486    i->ARMin.NCMovQ.cond = cond;
   1487    i->ARMin.NCMovQ.dst  = dst;
   1488    i->ARMin.NCMovQ.src  = src;
   1489    vassert(cond != ARMcc_AL);
   1490    return i;
   1491 }
   1492 
   1493 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
   1494                             HReg dst, HReg argL, HReg argR,
   1495                             UInt size, Bool Q ) {
   1496    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1497    i->tag                = ARMin_NShift;
   1498    i->ARMin.NShift.op   = op;
   1499    i->ARMin.NShift.argL = argL;
   1500    i->ARMin.NShift.argR = argR;
   1501    i->ARMin.NShift.dst  = dst;
   1502    i->ARMin.NShift.size = size;
   1503    i->ARMin.NShift.Q    = Q;
   1504    return i;
   1505 }
   1506 
   1507 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
   1508 {
   1509    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1510    i->tag              = ARMin_NShl64;
   1511    i->ARMin.NShl64.dst = dst;
   1512    i->ARMin.NShl64.src = src;
   1513    i->ARMin.NShl64.amt = amt;
   1514    vassert(amt >= 1 && amt <= 63);
   1515    return i;
   1516 }
   1517 
   1518 /* Helper copy-pasted from isel.c */
   1519 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
   1520 {
   1521    UInt i;
   1522    for (i = 0; i < 16; i++) {
   1523       if (0 == (u & 0xFFFFFF00)) {
   1524          *u8 = u;
   1525          *u4 = i;
   1526          return True;
   1527       }
   1528       u = ROR32(u, 30);
   1529    }
   1530    vassert(i == 16);
   1531    return False;
   1532 }
   1533 
   1534 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
   1535    UInt u8, u4;
   1536    ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1537    /* Try to generate single ADD if possible */
   1538    if (fitsIn8x4(&u8, &u4, imm32)) {
   1539       i->tag            = ARMin_Alu;
   1540       i->ARMin.Alu.op   = ARMalu_ADD;
   1541       i->ARMin.Alu.dst  = rD;
   1542       i->ARMin.Alu.argL = rN;
   1543       i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
   1544    } else {
   1545       i->tag               = ARMin_Add32;
   1546       i->ARMin.Add32.rD    = rD;
   1547       i->ARMin.Add32.rN    = rN;
   1548       i->ARMin.Add32.imm32 = imm32;
   1549    }
   1550    return i;
   1551 }
   1552 
   1553 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
   1554                              ARMAMode1* amFailAddr ) {
   1555    ARMInstr* i                 = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1556    i->tag                      = ARMin_EvCheck;
   1557    i->ARMin.EvCheck.amCounter  = amCounter;
   1558    i->ARMin.EvCheck.amFailAddr = amFailAddr;
   1559    return i;
   1560 }
   1561 
   1562 ARMInstr* ARMInstr_ProfInc ( void ) {
   1563    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
   1564    i->tag      = ARMin_ProfInc;
   1565    return i;
   1566 }
   1567 
   1568 /* ... */
   1569 
   1570 void ppARMInstr ( const ARMInstr* i ) {
   1571    switch (i->tag) {
   1572       case ARMin_Alu:
   1573          vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
   1574          ppHRegARM(i->ARMin.Alu.dst);
   1575          vex_printf(", ");
   1576          ppHRegARM(i->ARMin.Alu.argL);
   1577          vex_printf(", ");
   1578          ppARMRI84(i->ARMin.Alu.argR);
   1579          return;
   1580       case ARMin_Shift:
   1581          vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
   1582          ppHRegARM(i->ARMin.Shift.dst);
   1583          vex_printf(", ");
   1584          ppHRegARM(i->ARMin.Shift.argL);
   1585          vex_printf(", ");
   1586          ppARMRI5(i->ARMin.Shift.argR);
   1587          return;
   1588       case ARMin_Unary:
   1589          vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
   1590          ppHRegARM(i->ARMin.Unary.dst);
   1591          vex_printf(", ");
   1592          ppHRegARM(i->ARMin.Unary.src);
   1593          return;
   1594       case ARMin_CmpOrTst:
   1595          vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
   1596          ppHRegARM(i->ARMin.CmpOrTst.argL);
   1597          vex_printf(", ");
   1598          ppARMRI84(i->ARMin.CmpOrTst.argR);
   1599          return;
   1600       case ARMin_Mov:
   1601          vex_printf("mov   ");
   1602          ppHRegARM(i->ARMin.Mov.dst);
   1603          vex_printf(", ");
   1604          ppARMRI84(i->ARMin.Mov.src);
   1605          return;
   1606       case ARMin_Imm32:
   1607          vex_printf("imm   ");
   1608          ppHRegARM(i->ARMin.Imm32.dst);
   1609          vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
   1610          return;
   1611       case ARMin_LdSt32:
   1612          if (i->ARMin.LdSt32.isLoad) {
   1613             vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
   1614                                     : showARMCondCode(i->ARMin.LdSt32.cc));
   1615             ppHRegARM(i->ARMin.LdSt32.rD);
   1616             vex_printf(", ");
   1617             ppARMAMode1(i->ARMin.LdSt32.amode);
   1618          } else {
   1619             vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
   1620                                     : showARMCondCode(i->ARMin.LdSt32.cc));
   1621             ppARMAMode1(i->ARMin.LdSt32.amode);
   1622             vex_printf(", ");
   1623             ppHRegARM(i->ARMin.LdSt32.rD);
   1624          }
   1625          return;
   1626       case ARMin_LdSt16:
   1627          if (i->ARMin.LdSt16.isLoad) {
   1628             vex_printf("%s%s%s",
   1629                        i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
   1630                        i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
   1631                           : showARMCondCode(i->ARMin.LdSt16.cc),
   1632                        i->ARMin.LdSt16.signedLoad ? " " : "  ");
   1633             ppHRegARM(i->ARMin.LdSt16.rD);
   1634             vex_printf(", ");
   1635             ppARMAMode2(i->ARMin.LdSt16.amode);
   1636          } else {
   1637             vex_printf("strh%s  ",
   1638                        i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
   1639                           : showARMCondCode(i->ARMin.LdSt16.cc));
   1640             ppARMAMode2(i->ARMin.LdSt16.amode);
   1641             vex_printf(", ");
   1642             ppHRegARM(i->ARMin.LdSt16.rD);
   1643          }
   1644          return;
   1645       case ARMin_LdSt8U:
   1646          if (i->ARMin.LdSt8U.isLoad) {
   1647             vex_printf("ldrb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
   1648                                       : showARMCondCode(i->ARMin.LdSt8U.cc));
   1649             ppHRegARM(i->ARMin.LdSt8U.rD);
   1650             vex_printf(", ");
   1651             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1652          } else {
   1653             vex_printf("strb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
   1654                                       : showARMCondCode(i->ARMin.LdSt8U.cc));
   1655             ppARMAMode1(i->ARMin.LdSt8U.amode);
   1656             vex_printf(", ");
   1657             ppHRegARM(i->ARMin.LdSt8U.rD);
   1658          }
   1659          return;
   1660       case ARMin_Ld8S:
   1661          vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? "  "
   1662                                    : showARMCondCode(i->ARMin.Ld8S.cc));
   1663          ppARMAMode2(i->ARMin.Ld8S.amode);
   1664          vex_printf(", ");
   1665          ppHRegARM(i->ARMin.Ld8S.rD);
   1666          return;
   1667       case ARMin_XDirect:
   1668          vex_printf("(xDirect) ");
   1669          vex_printf("if (%%cpsr.%s) { ",
   1670                     showARMCondCode(i->ARMin.XDirect.cond));
   1671          vex_printf("movw r12,0x%x; ",
   1672                     (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
   1673          vex_printf("movt r12,0x%x; ",
   1674                     (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
   1675          vex_printf("str r12,");
   1676          ppARMAMode1(i->ARMin.XDirect.amR15T);
   1677          vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
   1678                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
   1679          vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
   1680                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
   1681          vex_printf("blx r12 }");
   1682          return;
   1683       case ARMin_XIndir:
   1684          vex_printf("(xIndir) ");
   1685          vex_printf("if (%%cpsr.%s) { ",
   1686                     showARMCondCode(i->ARMin.XIndir.cond));
   1687          vex_printf("str ");
   1688          ppHRegARM(i->ARMin.XIndir.dstGA);
   1689          vex_printf(",");
   1690          ppARMAMode1(i->ARMin.XIndir.amR15T);
   1691          vex_printf("; movw r12,LO16($disp_cp_xindir); ");
   1692          vex_printf("movt r12,HI16($disp_cp_xindir); ");
   1693          vex_printf("blx r12 }");
   1694          return;
   1695       case ARMin_XAssisted:
   1696          vex_printf("(xAssisted) ");
   1697          vex_printf("if (%%cpsr.%s) { ",
   1698                     showARMCondCode(i->ARMin.XAssisted.cond));
   1699          vex_printf("str ");
   1700          ppHRegARM(i->ARMin.XAssisted.dstGA);
   1701          vex_printf(",");
   1702          ppARMAMode1(i->ARMin.XAssisted.amR15T);
   1703          vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
   1704                     (Int)i->ARMin.XAssisted.jk);
   1705          vex_printf("movw r12,LO16($disp_cp_xassisted); ");
   1706          vex_printf("movt r12,HI16($disp_cp_xassisted); ");
   1707          vex_printf("blx r12 }");
   1708          return;
   1709       case ARMin_CMov:
   1710          vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
   1711          ppHRegARM(i->ARMin.CMov.dst);
   1712          vex_printf(", ");
   1713          ppARMRI84(i->ARMin.CMov.src);
   1714          return;
   1715       case ARMin_Call:
   1716          vex_printf("call%s  ",
   1717                     i->ARMin.Call.cond==ARMcc_AL
   1718                        ? "" : showARMCondCode(i->ARMin.Call.cond));
   1719          vex_printf("0x%x [nArgRegs=%d, ",
   1720                     i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
   1721          ppRetLoc(i->ARMin.Call.rloc);
   1722          vex_printf("]");
   1723          return;
   1724       case ARMin_Mul:
   1725          vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
   1726          if (i->ARMin.Mul.op == ARMmul_PLAIN) {
   1727             vex_printf("r0, r2, r3");
   1728          } else {
   1729             vex_printf("r1:r0, r2, r3");
   1730          }
   1731          return;
   1732       case ARMin_LdrEX: {
   1733          const HChar* sz = "";
   1734          switch (i->ARMin.LdrEX.szB) {
   1735             case 1: sz = "b"; break; case 2: sz = "h"; break;
   1736             case 8: sz = "d"; break; case 4: break;
   1737             default: vassert(0);
   1738          }
   1739          vex_printf("ldrex%s %sr2, [r4]",
   1740                     sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
   1741          return;
   1742       }
   1743       case ARMin_StrEX: {
   1744          const HChar* sz = "";
   1745          switch (i->ARMin.StrEX.szB) {
   1746             case 1: sz = "b"; break; case 2: sz = "h"; break;
   1747             case 8: sz = "d"; break; case 4: break;
   1748             default: vassert(0);
   1749          }
   1750          vex_printf("strex%s r0, %sr2, [r4]",
   1751                     sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
   1752          return;
   1753       }
   1754       case ARMin_VLdStD:
   1755          if (i->ARMin.VLdStD.isLoad) {
   1756             vex_printf("fldd  ");
   1757             ppHRegARM(i->ARMin.VLdStD.dD);
   1758             vex_printf(", ");
   1759             ppARMAModeV(i->ARMin.VLdStD.amode);
   1760          } else {
   1761             vex_printf("fstd  ");
   1762             ppARMAModeV(i->ARMin.VLdStD.amode);
   1763             vex_printf(", ");
   1764             ppHRegARM(i->ARMin.VLdStD.dD);
   1765          }
   1766          return;
   1767       case ARMin_VLdStS:
   1768          if (i->ARMin.VLdStS.isLoad) {
   1769             vex_printf("flds  ");
   1770             ppHRegARM(i->ARMin.VLdStS.fD);
   1771             vex_printf(", ");
   1772             ppARMAModeV(i->ARMin.VLdStS.amode);
   1773          } else {
   1774             vex_printf("fsts  ");
   1775             ppARMAModeV(i->ARMin.VLdStS.amode);
   1776             vex_printf(", ");
   1777             ppHRegARM(i->ARMin.VLdStS.fD);
   1778          }
   1779          return;
   1780       case ARMin_VAluD:
   1781          vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
   1782          ppHRegARM(i->ARMin.VAluD.dst);
   1783          vex_printf(", ");
   1784          ppHRegARM(i->ARMin.VAluD.argL);
   1785          vex_printf(", ");
   1786          ppHRegARM(i->ARMin.VAluD.argR);
   1787          return;
   1788       case ARMin_VAluS:
   1789          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
   1790          ppHRegARM(i->ARMin.VAluS.dst);
   1791          vex_printf(", ");
   1792          ppHRegARM(i->ARMin.VAluS.argL);
   1793          vex_printf(", ");
   1794          ppHRegARM(i->ARMin.VAluS.argR);
   1795          return;
   1796       case ARMin_VUnaryD:
   1797          vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
   1798          ppHRegARM(i->ARMin.VUnaryD.dst);
   1799          vex_printf(", ");
   1800          ppHRegARM(i->ARMin.VUnaryD.src);
   1801          return;
   1802       case ARMin_VUnaryS:
   1803          vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
   1804          ppHRegARM(i->ARMin.VUnaryS.dst);
   1805          vex_printf(", ");
   1806          ppHRegARM(i->ARMin.VUnaryS.src);
   1807          return;
   1808       case ARMin_VCmpD:
   1809          vex_printf("fcmpd ");
   1810          ppHRegARM(i->ARMin.VCmpD.argL);
   1811          vex_printf(", ");
   1812          ppHRegARM(i->ARMin.VCmpD.argR);
   1813          vex_printf(" ; fmstat");
   1814          return;
   1815       case ARMin_VCMovD:
   1816          vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
   1817          ppHRegARM(i->ARMin.VCMovD.dst);
   1818          vex_printf(", ");
   1819          ppHRegARM(i->ARMin.VCMovD.src);
   1820          return;
   1821       case ARMin_VCMovS:
   1822          vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
   1823          ppHRegARM(i->ARMin.VCMovS.dst);
   1824          vex_printf(", ");
   1825          ppHRegARM(i->ARMin.VCMovS.src);
   1826          return;
   1827       case ARMin_VCvtSD:
   1828          vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
   1829          ppHRegARM(i->ARMin.VCvtSD.dst);
   1830          vex_printf(", ");
   1831          ppHRegARM(i->ARMin.VCvtSD.src);
   1832          return;
   1833       case ARMin_VXferQ:
   1834          if (i->ARMin.VXferQ.toQ) {
   1835             vex_printf("vmov ");
   1836             ppHRegARM(i->ARMin.VXferQ.qD);
   1837             vex_printf("-lo64, ");
   1838             ppHRegARM(i->ARMin.VXferQ.dLo);
   1839             vex_printf(" ; vmov ");
   1840             ppHRegARM(i->ARMin.VXferQ.qD);
   1841             vex_printf("-hi64, ");
   1842             ppHRegARM(i->ARMin.VXferQ.dHi);
   1843          } else {
   1844             vex_printf("vmov ");
   1845             ppHRegARM(i->ARMin.VXferQ.dLo);
   1846             vex_printf(", ");
   1847             ppHRegARM(i->ARMin.VXferQ.qD);
   1848             vex_printf("-lo64");
   1849             vex_printf(" ; vmov ");
   1850             ppHRegARM(i->ARMin.VXferQ.dHi);
   1851             vex_printf(", ");
   1852             ppHRegARM(i->ARMin.VXferQ.qD);
   1853             vex_printf("-hi64");
   1854          }
   1855          return;
   1856       case ARMin_VXferD:
   1857          vex_printf("vmov  ");
   1858          if (i->ARMin.VXferD.toD) {
   1859             ppHRegARM(i->ARMin.VXferD.dD);
   1860             vex_printf(", ");
   1861             ppHRegARM(i->ARMin.VXferD.rLo);
   1862             vex_printf(", ");
   1863             ppHRegARM(i->ARMin.VXferD.rHi);
   1864          } else {
   1865             ppHRegARM(i->ARMin.VXferD.rLo);
   1866             vex_printf(", ");
   1867             ppHRegARM(i->ARMin.VXferD.rHi);
   1868             vex_printf(", ");
   1869             ppHRegARM(i->ARMin.VXferD.dD);
   1870          }
   1871          return;
   1872       case ARMin_VXferS:
   1873          vex_printf("vmov  ");
   1874          if (i->ARMin.VXferS.toS) {
   1875             ppHRegARM(i->ARMin.VXferS.fD);
   1876             vex_printf(", ");
   1877             ppHRegARM(i->ARMin.VXferS.rLo);
   1878          } else {
   1879             ppHRegARM(i->ARMin.VXferS.rLo);
   1880             vex_printf(", ");
   1881             ppHRegARM(i->ARMin.VXferS.fD);
   1882          }
   1883          return;
   1884       case ARMin_VCvtID: {
   1885          const HChar* nm = "?";
   1886          if (i->ARMin.VCvtID.iToD) {
   1887             nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
   1888          } else {
   1889             nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
   1890          }
   1891          vex_printf("%s ", nm);
   1892          ppHRegARM(i->ARMin.VCvtID.dst);
   1893          vex_printf(", ");
   1894          ppHRegARM(i->ARMin.VCvtID.src);
   1895          return;
   1896       }
   1897       case ARMin_VRIntR: {
   1898          const HChar* sz = i->ARMin.VRIntR.isF64 ? "f64" : "f32";
   1899          vex_printf("vrintr.%s.%s ", sz, sz);
   1900          ppHRegARM(i->ARMin.VRIntR.dst);
   1901          vex_printf(", ");
   1902          ppHRegARM(i->ARMin.VRIntR.src);
   1903          return;
   1904       }
   1905       case ARMin_VMinMaxNum: {
   1906          const HChar* sz = i->ARMin.VMinMaxNum.isF64 ? "f64" : "f32";
   1907          const HChar* nm = i->ARMin.VMinMaxNum.isMax ? "vmaxnm" : "vminnm";
   1908          vex_printf("%s.%s ", nm, sz);
   1909          ppHRegARM(i->ARMin.VMinMaxNum.dst);
   1910          vex_printf(", ");
   1911          ppHRegARM(i->ARMin.VMinMaxNum.srcL);
   1912          vex_printf(", ");
   1913          ppHRegARM(i->ARMin.VMinMaxNum.srcR);
   1914          return;
   1915       }
   1916       case ARMin_FPSCR:
   1917          if (i->ARMin.FPSCR.toFPSCR) {
   1918             vex_printf("fmxr  fpscr, ");
   1919             ppHRegARM(i->ARMin.FPSCR.iReg);
   1920          } else {
   1921             vex_printf("fmrx  ");
   1922             ppHRegARM(i->ARMin.FPSCR.iReg);
   1923             vex_printf(", fpscr");
   1924          }
   1925          return;
   1926       case ARMin_MFence:
   1927          vex_printf("(mfence) dsb sy; dmb sy; isb");
   1928          return;
   1929       case ARMin_CLREX:
   1930          vex_printf("clrex");
   1931          return;
   1932       case ARMin_NLdStQ:
   1933          if (i->ARMin.NLdStQ.isLoad)
   1934             vex_printf("vld1.32 {");
   1935          else
   1936             vex_printf("vst1.32 {");
   1937          ppHRegARM(i->ARMin.NLdStQ.dQ);
   1938          vex_printf("} ");
   1939          ppARMAModeN(i->ARMin.NLdStQ.amode);
   1940          return;
   1941       case ARMin_NLdStD:
   1942          if (i->ARMin.NLdStD.isLoad)
   1943             vex_printf("vld1.32 {");
   1944          else
   1945             vex_printf("vst1.32 {");
   1946          ppHRegARM(i->ARMin.NLdStD.dD);
   1947          vex_printf("} ");
   1948          ppARMAModeN(i->ARMin.NLdStD.amode);
   1949          return;
   1950       case ARMin_NUnary:
   1951          vex_printf("%s%s%s  ",
   1952                     showARMNeonUnOp(i->ARMin.NUnary.op),
   1953                     showARMNeonUnOpDataType(i->ARMin.NUnary.op),
   1954                     showARMNeonDataSize(i));
   1955          ppHRegARM(i->ARMin.NUnary.dst);
   1956          vex_printf(", ");
   1957          ppHRegARM(i->ARMin.NUnary.src);
   1958          if (i->ARMin.NUnary.op == ARMneon_EQZ)
   1959             vex_printf(", #0");
   1960          if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
   1961              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
   1962              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
   1963              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
   1964             vex_printf(", #%u", i->ARMin.NUnary.size);
   1965          }
   1966          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
   1967              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
   1968              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
   1969             UInt size;
   1970             size = i->ARMin.NUnary.size;
   1971             if (size & 0x40) {
   1972                vex_printf(", #%u", size - 64);
   1973             } else if (size & 0x20) {
   1974                vex_printf(", #%u", size - 32);
   1975             } else if (size & 0x10) {
   1976                vex_printf(", #%u", size - 16);
   1977             } else if (size & 0x08) {
   1978                vex_printf(", #%u", size - 8);
   1979             }
   1980          }
   1981          return;
   1982       case ARMin_NUnaryS:
   1983          vex_printf("%s%s%s  ",
   1984                     showARMNeonUnOpS(i->ARMin.NUnaryS.op),
   1985                     showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
   1986                     showARMNeonDataSize(i));
   1987          ppARMNRS(i->ARMin.NUnaryS.dst);
   1988          vex_printf(", ");
   1989          ppARMNRS(i->ARMin.NUnaryS.src);
   1990          return;
   1991       case ARMin_NShift:
   1992          vex_printf("%s%s%s  ",
   1993                     showARMNeonShiftOp(i->ARMin.NShift.op),
   1994                     showARMNeonShiftOpDataType(i->ARMin.NShift.op),
   1995                     showARMNeonDataSize(i));
   1996          ppHRegARM(i->ARMin.NShift.dst);
   1997          vex_printf(", ");
   1998          ppHRegARM(i->ARMin.NShift.argL);
   1999          vex_printf(", ");
   2000          ppHRegARM(i->ARMin.NShift.argR);
   2001          return;
   2002       case ARMin_NShl64:
   2003          vex_printf("vshl.i64 ");
   2004          ppHRegARM(i->ARMin.NShl64.dst);
   2005          vex_printf(", ");
   2006          ppHRegARM(i->ARMin.NShl64.src);
   2007          vex_printf(", #%u", i->ARMin.NShl64.amt);
   2008          return;
   2009       case ARMin_NDual:
   2010          vex_printf("%s%s%s  ",
   2011                     showARMNeonDualOp(i->ARMin.NDual.op),
   2012                     showARMNeonDualOpDataType(i->ARMin.NDual.op),
   2013                     showARMNeonDataSize(i));
   2014          ppHRegARM(i->ARMin.NDual.arg1);
   2015          vex_printf(", ");
   2016          ppHRegARM(i->ARMin.NDual.arg2);
   2017          return;
   2018       case ARMin_NBinary:
   2019          vex_printf("%s%s%s",
   2020                     showARMNeonBinOp(i->ARMin.NBinary.op),
   2021                     showARMNeonBinOpDataType(i->ARMin.NBinary.op),
   2022                     showARMNeonDataSize(i));
   2023          vex_printf("  ");
   2024          ppHRegARM(i->ARMin.NBinary.dst);
   2025          vex_printf(", ");
   2026          ppHRegARM(i->ARMin.NBinary.argL);
   2027          vex_printf(", ");
   2028          ppHRegARM(i->ARMin.NBinary.argR);
   2029          return;
   2030       case ARMin_NeonImm:
   2031          vex_printf("vmov  ");
   2032          ppHRegARM(i->ARMin.NeonImm.dst);
   2033          vex_printf(", ");
   2034          ppARMNImm(i->ARMin.NeonImm.imm);
   2035          return;
   2036       case ARMin_NCMovQ:
   2037          vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
   2038          ppHRegARM(i->ARMin.NCMovQ.dst);
   2039          vex_printf(", ");
   2040          ppHRegARM(i->ARMin.NCMovQ.src);
   2041          return;
   2042       case ARMin_Add32:
   2043          vex_printf("add32 ");
   2044          ppHRegARM(i->ARMin.Add32.rD);
   2045          vex_printf(", ");
   2046          ppHRegARM(i->ARMin.Add32.rN);
   2047          vex_printf(", ");
   2048          vex_printf("%u", i->ARMin.Add32.imm32);
   2049          return;
   2050       case ARMin_EvCheck:
   2051          vex_printf("(evCheck) ldr r12,");
   2052          ppARMAMode1(i->ARMin.EvCheck.amCounter);
   2053          vex_printf("; subs r12,r12,$1; str r12,");
   2054          ppARMAMode1(i->ARMin.EvCheck.amCounter);
   2055          vex_printf("; bpl nofail; ldr r12,");
   2056          ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
   2057          vex_printf("; bx r12; nofail:");
   2058          return;
   2059       case ARMin_ProfInc:
   2060          vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
   2061                     "movw r12,HI16($NotKnownYet); "
   2062                     "ldr r11,[r12]; "
   2063                     "adds r11,r11,$1; "
   2064                     "str r11,[r12]; "
   2065                     "ldr r11,[r12+4]; "
   2066                     "adc r11,r11,$0; "
   2067                     "str r11,[r12+4]");
   2068          return;
   2069       default:
   2070          vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
   2071          vpanic("ppARMInstr(1)");
   2072          return;
   2073    }
   2074 }
   2075 
   2076 
   2077 /* --------- Helpers for register allocation. --------- */
   2078 
   2079 void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
   2080 {
   2081    vassert(mode64 == False);
   2082    initHRegUsage(u);
   2083    switch (i->tag) {
   2084       case ARMin_Alu:
   2085          addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
   2086          addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
   2087          addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
   2088          return;
   2089       case ARMin_Shift:
   2090          addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
   2091          addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
   2092          addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
   2093          return;
   2094       case ARMin_Unary:
   2095          addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
   2096          addHRegUse(u, HRmRead, i->ARMin.Unary.src);
   2097          return;
   2098       case ARMin_CmpOrTst:
   2099          addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
   2100          addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
   2101          return;
   2102       case ARMin_Mov:
   2103          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
   2104          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
   2105          return;
   2106       case ARMin_Imm32:
   2107          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
   2108          return;
   2109       case ARMin_LdSt32:
   2110          addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
   2111          if (i->ARMin.LdSt32.isLoad) {
   2112             addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
   2113             if (i->ARMin.LdSt32.cc != ARMcc_AL)
   2114                addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
   2115          } else {
   2116             addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
   2117          }
   2118          return;
   2119       case ARMin_LdSt16:
   2120          addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
   2121          if (i->ARMin.LdSt16.isLoad) {
   2122             addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
   2123             if (i->ARMin.LdSt16.cc != ARMcc_AL)
   2124                addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
   2125          } else {
   2126             addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
   2127          }
   2128          return;
   2129       case ARMin_LdSt8U:
   2130          addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
   2131          if (i->ARMin.LdSt8U.isLoad) {
   2132             addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
   2133             if (i->ARMin.LdSt8U.cc != ARMcc_AL)
   2134                addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
   2135          } else {
   2136             addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
   2137          }
   2138          return;
   2139       case ARMin_Ld8S:
   2140          addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
   2141          addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
   2142          if (i->ARMin.Ld8S.cc != ARMcc_AL)
   2143             addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
   2144          return;
   2145       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
   2146          conditionally exit the block.  Hence we only need to list (1)
   2147          the registers that they read, and (2) the registers that they
   2148          write in the case where the block is not exited.  (2) is
   2149          empty, hence only (1) is relevant here. */
   2150       case ARMin_XDirect:
   2151          addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
   2152          return;
   2153       case ARMin_XIndir:
   2154          addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
   2155          addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
   2156          return;
   2157       case ARMin_XAssisted:
   2158          addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
   2159          addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
   2160          return;
   2161       case ARMin_CMov:
   2162          addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
   2163          addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
   2164          addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
   2165          return;
   2166       case ARMin_Call:
   2167          /* logic and comments copied/modified from x86 back end */
   2168          /* This is a bit subtle. */
   2169          /* First off, claim it trashes all the caller-saved regs
   2170             which fall within the register allocator's jurisdiction.
   2171             These I believe to be r0,1,2,3.  If it turns out that r9
   2172             is also caller-saved, then we'll have to add that here
   2173             too. */
   2174          addHRegUse(u, HRmWrite, hregARM_R0());
   2175          addHRegUse(u, HRmWrite, hregARM_R1());
   2176          addHRegUse(u, HRmWrite, hregARM_R2());
   2177          addHRegUse(u, HRmWrite, hregARM_R3());
   2178          /* Now we have to state any parameter-carrying registers
   2179             which might be read.  This depends on nArgRegs. */
   2180          switch (i->ARMin.Call.nArgRegs) {
   2181             case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
   2182             case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
   2183             case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
   2184             case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
   2185             case 0: break;
   2186             default: vpanic("getRegUsage_ARM:Call:regparms");
   2187          }
   2188          /* Finally, there is the issue that the insn trashes a
   2189             register because the literal target address has to be
   2190             loaded into a register.  Fortunately, for the nArgRegs=
   2191             0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
   2192             this does not cause any further damage.  For the
   2193             nArgRegs=4 case, we'll have to choose another register
   2194             arbitrarily since all the caller saved regs are used for
   2195             parameters, and so we might as well choose r11.
   2196             */
   2197          if (i->ARMin.Call.nArgRegs == 4)
   2198             addHRegUse(u, HRmWrite, hregARM_R11());
   2199          /* Upshot of this is that the assembler really must observe
   2200             the here-stated convention of which register to use as an
   2201             address temporary, depending on nArgRegs: 0==r0,
   2202             1==r1, 2==r2, 3==r3, 4==r11 */
   2203          return;
   2204       case ARMin_Mul:
   2205          addHRegUse(u, HRmRead, hregARM_R2());
   2206          addHRegUse(u, HRmRead, hregARM_R3());
   2207          addHRegUse(u, HRmWrite, hregARM_R0());
   2208          if (i->ARMin.Mul.op != ARMmul_PLAIN)
   2209             addHRegUse(u, HRmWrite, hregARM_R1());
   2210          return;
   2211       case ARMin_LdrEX:
   2212          addHRegUse(u, HRmRead, hregARM_R4());
   2213          addHRegUse(u, HRmWrite, hregARM_R2());
   2214          if (i->ARMin.LdrEX.szB == 8)
   2215             addHRegUse(u, HRmWrite, hregARM_R3());
   2216          return;
   2217       case ARMin_StrEX:
   2218          addHRegUse(u, HRmRead, hregARM_R4());
   2219          addHRegUse(u, HRmWrite, hregARM_R0());
   2220          addHRegUse(u, HRmRead, hregARM_R2());
   2221          if (i->ARMin.StrEX.szB == 8)
   2222             addHRegUse(u, HRmRead, hregARM_R3());
   2223          return;
   2224       case ARMin_VLdStD:
   2225          addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
   2226          if (i->ARMin.VLdStD.isLoad) {
   2227             addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
   2228          } else {
   2229             addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
   2230          }
   2231          return;
   2232       case ARMin_VLdStS:
   2233          addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
   2234          if (i->ARMin.VLdStS.isLoad) {
   2235             addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
   2236          } else {
   2237             addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
   2238          }
   2239          return;
   2240       case ARMin_VAluD:
   2241          addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
   2242          addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
   2243          addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
   2244          return;
   2245       case ARMin_VAluS:
   2246          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
   2247          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
   2248          addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
   2249          return;
   2250       case ARMin_VUnaryD:
   2251          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
   2252          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
   2253          return;
   2254       case ARMin_VUnaryS:
   2255          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
   2256          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
   2257          return;
   2258       case ARMin_VCmpD:
   2259          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
   2260          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
   2261          return;
   2262       case ARMin_VCMovD:
   2263          addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
   2264          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
   2265          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
   2266          return;
   2267       case ARMin_VCMovS:
   2268          addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
   2269          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
   2270          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
   2271          return;
   2272       case ARMin_VCvtSD:
   2273          addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
   2274          addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
   2275          return;
   2276       case ARMin_VXferQ:
   2277          if (i->ARMin.VXferQ.toQ) {
   2278             addHRegUse(u, HRmWrite, i->ARMin.VXferQ.qD);
   2279             addHRegUse(u, HRmRead,  i->ARMin.VXferQ.dHi);
   2280             addHRegUse(u, HRmRead,  i->ARMin.VXferQ.dLo);
   2281          } else {
   2282             addHRegUse(u, HRmRead,  i->ARMin.VXferQ.qD);
   2283             addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dHi);
   2284             addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dLo);
   2285          }
   2286          return;
   2287       case ARMin_VXferD:
   2288          if (i->ARMin.VXferD.toD) {
   2289             addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
   2290             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
   2291             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
   2292          } else {
   2293             addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
   2294             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
   2295             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
   2296          }
   2297          return;
   2298       case ARMin_VXferS:
   2299          if (i->ARMin.VXferS.toS) {
   2300             addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
   2301             addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
   2302          } else {
   2303             addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
   2304             addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
   2305          }
   2306          return;
   2307       case ARMin_VCvtID:
   2308          addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
   2309          addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
   2310          return;
   2311       case ARMin_VRIntR:
   2312          addHRegUse(u, HRmWrite, i->ARMin.VRIntR.dst);
   2313          addHRegUse(u, HRmRead,  i->ARMin.VRIntR.src);
   2314          return;
   2315       case ARMin_VMinMaxNum:
   2316          addHRegUse(u, HRmWrite, i->ARMin.VMinMaxNum.dst);
   2317          addHRegUse(u, HRmRead,  i->ARMin.VMinMaxNum.srcL);
   2318          addHRegUse(u, HRmRead,  i->ARMin.VMinMaxNum.srcR);
   2319          return;
   2320       case ARMin_FPSCR:
   2321          if (i->ARMin.FPSCR.toFPSCR)
   2322             addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
   2323          else
   2324             addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
   2325          return;
   2326       case ARMin_MFence:
   2327          return;
   2328       case ARMin_CLREX:
   2329          return;
   2330       case ARMin_NLdStQ:
   2331          if (i->ARMin.NLdStQ.isLoad)
   2332             addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
   2333          else
   2334             addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
   2335          addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
   2336          return;
   2337       case ARMin_NLdStD:
   2338          if (i->ARMin.NLdStD.isLoad)
   2339             addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
   2340          else
   2341             addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
   2342          addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
   2343          return;
   2344       case ARMin_NUnary:
   2345          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
   2346          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
   2347          return;
   2348       case ARMin_NUnaryS:
   2349          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
   2350          addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
   2351          return;
   2352       case ARMin_NShift:
   2353          addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
   2354          addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
   2355          addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
   2356          return;
   2357       case ARMin_NShl64:
   2358          addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
   2359          addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
   2360          return;
   2361       case ARMin_NDual:
   2362          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
   2363          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
   2364          addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
   2365          addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
   2366          return;
   2367       case ARMin_NBinary:
   2368          addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
   2369          /* TODO: sometimes dst is also being read! */
   2370          // XXX fix this
   2371          addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
   2372          addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
   2373          return;
   2374       case ARMin_NeonImm:
   2375          addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
   2376          return;
   2377       case ARMin_NCMovQ:
   2378          addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
   2379          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
   2380          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
   2381          return;
   2382       case ARMin_Add32:
   2383          addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
   2384          addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
   2385          return;
   2386       case ARMin_EvCheck:
   2387          /* We expect both amodes only to mention r8, so this is in
   2388             fact pointless, since r8 isn't allocatable, but
   2389             anyway.. */
   2390          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
   2391          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
   2392          addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
   2393          return;
   2394       case ARMin_ProfInc:
   2395          addHRegUse(u, HRmWrite, hregARM_R12());
   2396          addHRegUse(u, HRmWrite, hregARM_R11());
   2397          return;
   2398       default:
   2399          ppARMInstr(i);
   2400          vpanic("getRegUsage_ARMInstr");
   2401    }
   2402 }
   2403 
   2404 
   2405 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
   2406 {
   2407    vassert(mode64 == False);
   2408    switch (i->tag) {
   2409       case ARMin_Alu:
   2410          i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
   2411          i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
   2412          mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
   2413          return;
   2414       case ARMin_Shift:
   2415          i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
   2416          i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
   2417          mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
   2418          return;
   2419       case ARMin_Unary:
   2420          i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
   2421          i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
   2422          return;
   2423       case ARMin_CmpOrTst:
   2424          i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
   2425          mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
   2426          return;
   2427       case ARMin_Mov:
   2428          i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
   2429          mapRegs_ARMRI84(m, i->ARMin.Mov.src);
   2430          return;
   2431       case ARMin_Imm32:
   2432          i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
   2433          return;
   2434       case ARMin_LdSt32:
   2435          i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
   2436          mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
   2437          return;
   2438       case ARMin_LdSt16:
   2439          i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
   2440          mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
   2441          return;
   2442       case ARMin_LdSt8U:
   2443          i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
   2444          mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
   2445          return;
   2446       case ARMin_Ld8S:
   2447          i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
   2448          mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
   2449          return;
   2450       case ARMin_XDirect:
   2451          mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
   2452          return;
   2453       case ARMin_XIndir:
   2454          i->ARMin.XIndir.dstGA
   2455             = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
   2456          mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
   2457          return;
   2458       case ARMin_XAssisted:
   2459          i->ARMin.XAssisted.dstGA
   2460             = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
   2461          mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
   2462          return;
   2463       case ARMin_CMov:
   2464          i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
   2465          mapRegs_ARMRI84(m, i->ARMin.CMov.src);
   2466          return;
   2467       case ARMin_Call:
   2468          return;
   2469       case ARMin_Mul:
   2470          return;
   2471       case ARMin_LdrEX:
   2472          return;
   2473       case ARMin_StrEX:
   2474          return;
   2475       case ARMin_VLdStD:
   2476          i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
   2477          mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
   2478          return;
   2479       case ARMin_VLdStS:
   2480          i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
   2481          mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
   2482          return;
   2483       case ARMin_VAluD:
   2484          i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
   2485          i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
   2486          i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
   2487          return;
   2488       case ARMin_VAluS:
   2489          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
   2490          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
   2491          i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
   2492          return;
   2493       case ARMin_VUnaryD:
   2494          i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
   2495          i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
   2496          return;
   2497       case ARMin_VUnaryS:
   2498          i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
   2499          i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
   2500          return;
   2501       case ARMin_VCmpD:
   2502          i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
   2503          i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
   2504          return;
   2505       case ARMin_VCMovD:
   2506          i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
   2507          i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
   2508          return;
   2509       case ARMin_VCMovS:
   2510          i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
   2511          i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
   2512          return;
   2513       case ARMin_VCvtSD:
   2514          i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
   2515          i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
   2516          return;
   2517       case ARMin_VXferQ:
   2518          i->ARMin.VXferQ.qD  = lookupHRegRemap(m, i->ARMin.VXferQ.qD);
   2519          i->ARMin.VXferQ.dHi = lookupHRegRemap(m, i->ARMin.VXferQ.dHi);
   2520          i->ARMin.VXferQ.dLo = lookupHRegRemap(m, i->ARMin.VXferQ.dLo);
   2521          return;
   2522       case ARMin_VXferD:
   2523          i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
   2524          i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
   2525          i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
   2526          return;
   2527       case ARMin_VXferS:
   2528          i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
   2529          i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
   2530          return;
   2531       case ARMin_VCvtID:
   2532          i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
   2533          i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
   2534          return;
   2535       case ARMin_VRIntR:
   2536          i->ARMin.VRIntR.dst = lookupHRegRemap(m, i->ARMin.VRIntR.dst);
   2537          i->ARMin.VRIntR.src = lookupHRegRemap(m, i->ARMin.VRIntR.src);
   2538          return;
   2539       case ARMin_VMinMaxNum:
   2540          i->ARMin.VMinMaxNum.dst
   2541             = lookupHRegRemap(m, i->ARMin.VMinMaxNum.dst);
   2542          i->ARMin.VMinMaxNum.srcL
   2543             = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcL);
   2544          i->ARMin.VMinMaxNum.srcR
   2545             = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcR);
   2546          return;
   2547       case ARMin_FPSCR:
   2548          i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
   2549          return;
   2550       case ARMin_MFence:
   2551          return;
   2552       case ARMin_CLREX:
   2553          return;
   2554       case ARMin_NLdStQ:
   2555          i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
   2556          mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
   2557          return;
   2558       case ARMin_NLdStD:
   2559          i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
   2560          mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
   2561          return;
   2562       case ARMin_NUnary:
   2563          i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
   2564          i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
   2565          return;
   2566       case ARMin_NUnaryS:
   2567          i->ARMin.NUnaryS.src->reg
   2568             = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
   2569          i->ARMin.NUnaryS.dst->reg
   2570             = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
   2571          return;
   2572       case ARMin_NShift:
   2573          i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
   2574          i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
   2575          i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
   2576          return;
   2577       case ARMin_NShl64:
   2578          i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
   2579          i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
   2580          return;
   2581       case ARMin_NDual:
   2582          i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
   2583          i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
   2584          return;
   2585       case ARMin_NBinary:
   2586          i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
   2587          i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
   2588          i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
   2589          return;
   2590       case ARMin_NeonImm:
   2591          i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
   2592          return;
   2593       case ARMin_NCMovQ:
   2594          i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
   2595          i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
   2596          return;
   2597       case ARMin_Add32:
   2598          i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
   2599          i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
   2600          return;
   2601       case ARMin_EvCheck:
   2602          /* We expect both amodes only to mention r8, so this is in
   2603             fact pointless, since r8 isn't allocatable, but
   2604             anyway.. */
   2605          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
   2606          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
   2607          return;
   2608       case ARMin_ProfInc:
   2609          /* hardwires r11 and r12 -- nothing to modify. */
   2610          return;
   2611       default:
   2612          ppARMInstr(i);
   2613          vpanic("mapRegs_ARMInstr");
   2614    }
   2615 }
   2616 
   2617 /* Figure out if i represents a reg-reg move, and if so assign the
   2618    source and destination to *src and *dst.  If in doubt say No.  Used
   2619    by the register allocator to do move coalescing.
   2620 */
   2621 Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
   2622 {
   2623    /* Moves between integer regs */
   2624    switch (i->tag) {
   2625       case ARMin_Mov:
   2626          if (i->ARMin.Mov.src->tag == ARMri84_R) {
   2627             *src = i->ARMin.Mov.src->ARMri84.R.reg;
   2628             *dst = i->ARMin.Mov.dst;
   2629             return True;
   2630          }
   2631          break;
   2632       case ARMin_VUnaryD:
   2633          if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
   2634             *src = i->ARMin.VUnaryD.src;
   2635             *dst = i->ARMin.VUnaryD.dst;
   2636             return True;
   2637          }
   2638          break;
   2639       case ARMin_VUnaryS:
   2640          if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
   2641             *src = i->ARMin.VUnaryS.src;
   2642             *dst = i->ARMin.VUnaryS.dst;
   2643             return True;
   2644          }
   2645          break;
   2646       case ARMin_NUnary:
   2647          if (i->ARMin.NUnary.op == ARMneon_COPY) {
   2648             *src = i->ARMin.NUnary.src;
   2649             *dst = i->ARMin.NUnary.dst;
   2650             return True;
   2651          }
   2652          break;
   2653       default:
   2654          break;
   2655    }
   2656 
   2657    return False;
   2658 }
   2659 
   2660 
   2661 /* Generate arm spill/reload instructions under the direction of the
   2662    register allocator.  Note it's critical these don't write the
   2663    condition codes. */
   2664 
   2665 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2666                     HReg rreg, Int offsetB, Bool mode64 )
   2667 {
   2668    HRegClass rclass;
   2669    vassert(offsetB >= 0);
   2670    vassert(!hregIsVirtual(rreg));
   2671    vassert(mode64 == False);
   2672    *i1 = *i2 = NULL;
   2673    rclass = hregClass(rreg);
   2674    switch (rclass) {
   2675       case HRcInt32:
   2676          vassert(offsetB <= 4095);
   2677          *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
   2678                                 rreg,
   2679                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2680          return;
   2681       case HRcFlt32:
   2682       case HRcFlt64: {
   2683          HReg r8   = hregARM_R8();  /* baseblock */
   2684          HReg r12  = hregARM_R12(); /* spill temp */
   2685          HReg base = r8;
   2686          vassert(0 == (offsetB & 3));
   2687          if (offsetB >= 1024) {
   2688             Int offsetKB = offsetB / 1024;
   2689             /* r12 = r8 + (1024 * offsetKB) */
   2690             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2691                                ARMRI84_I84(offsetKB, 11));
   2692             offsetB -= (1024 * offsetKB);
   2693             base = r12;
   2694          }
   2695          vassert(offsetB <= 1020);
   2696          if (rclass == HRcFlt32) {
   2697             *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
   2698                                    rreg,
   2699                                    mkARMAModeV(base, offsetB) );
   2700          } else {
   2701             *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
   2702                                    rreg,
   2703                                    mkARMAModeV(base, offsetB) );
   2704          }
   2705          return;
   2706       }
   2707       case HRcVec128: {
   2708          HReg r8  = hregARM_R8();
   2709          HReg r12 = hregARM_R12();
   2710          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2711          *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
   2712          return;
   2713       }
   2714       default:
   2715          ppHRegClass(rclass);
   2716          vpanic("genSpill_ARM: unimplemented regclass");
   2717    }
   2718 }
   2719 
   2720 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2721                      HReg rreg, Int offsetB, Bool mode64 )
   2722 {
   2723    HRegClass rclass;
   2724    vassert(offsetB >= 0);
   2725    vassert(!hregIsVirtual(rreg));
   2726    vassert(mode64 == False);
   2727    *i1 = *i2 = NULL;
   2728    rclass = hregClass(rreg);
   2729    switch (rclass) {
   2730       case HRcInt32:
   2731          vassert(offsetB <= 4095);
   2732          *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
   2733                                 rreg,
   2734                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
   2735          return;
   2736       case HRcFlt32:
   2737       case HRcFlt64: {
   2738          HReg r8   = hregARM_R8();  /* baseblock */
   2739          HReg r12  = hregARM_R12(); /* spill temp */
   2740          HReg base = r8;
   2741          vassert(0 == (offsetB & 3));
   2742          if (offsetB >= 1024) {
   2743             Int offsetKB = offsetB / 1024;
   2744             /* r12 = r8 + (1024 * offsetKB) */
   2745             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
   2746                                ARMRI84_I84(offsetKB, 11));
   2747             offsetB -= (1024 * offsetKB);
   2748             base = r12;
   2749          }
   2750          vassert(offsetB <= 1020);
   2751          if (rclass == HRcFlt32) {
   2752             *i2 = ARMInstr_VLdStS( True/*isLoad*/,
   2753                                    rreg,
   2754                                    mkARMAModeV(base, offsetB) );
   2755          } else {
   2756             *i2 = ARMInstr_VLdStD( True/*isLoad*/,
   2757                                    rreg,
   2758                                    mkARMAModeV(base, offsetB) );
   2759          }
   2760          return;
   2761       }
   2762       case HRcVec128: {
   2763          HReg r8  = hregARM_R8();
   2764          HReg r12 = hregARM_R12();
   2765          *i1 = ARMInstr_Add32(r12, r8, offsetB);
   2766          *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
   2767          return;
   2768       }
   2769       default:
   2770          ppHRegClass(rclass);
   2771          vpanic("genReload_ARM: unimplemented regclass");
   2772    }
   2773 }
   2774 
   2775 
   2776 /* Emit an instruction into buf and return the number of bytes used.
   2777    Note that buf is not the insn's final place, and therefore it is
   2778    imperative to emit position-independent code. */
   2779 
   2780 static inline UInt iregEnc ( HReg r )
   2781 {
   2782    UInt n;
   2783    vassert(hregClass(r) == HRcInt32);
   2784    vassert(!hregIsVirtual(r));
   2785    n = hregEncoding(r);
   2786    vassert(n <= 15);
   2787    return n;
   2788 }
   2789 
   2790 static inline UInt dregEnc ( HReg r )
   2791 {
   2792    UInt n;
   2793    vassert(hregClass(r) == HRcFlt64);
   2794    vassert(!hregIsVirtual(r));
   2795    n = hregEncoding(r);
   2796    vassert(n <= 31);
   2797    return n;
   2798 }
   2799 
   2800 static inline UInt fregEnc ( HReg r )
   2801 {
   2802    UInt n;
   2803    vassert(hregClass(r) == HRcFlt32);
   2804    vassert(!hregIsVirtual(r));
   2805    n = hregEncoding(r);
   2806    vassert(n <= 31);
   2807    return n;
   2808 }
   2809 
   2810 static inline UInt qregEnc ( HReg r )
   2811 {
   2812    UInt n;
   2813    vassert(hregClass(r) == HRcVec128);
   2814    vassert(!hregIsVirtual(r));
   2815    n = hregEncoding(r);
   2816    vassert(n <= 15);
   2817    return n;
   2818 }
   2819 
   2820 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
   2821    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
   2822 #define X0000  BITS4(0,0,0,0)
   2823 #define X0001  BITS4(0,0,0,1)
   2824 #define X0010  BITS4(0,0,1,0)
   2825 #define X0011  BITS4(0,0,1,1)
   2826 #define X0100  BITS4(0,1,0,0)
   2827 #define X0101  BITS4(0,1,0,1)
   2828 #define X0110  BITS4(0,1,1,0)
   2829 #define X0111  BITS4(0,1,1,1)
   2830 #define X1000  BITS4(1,0,0,0)
   2831 #define X1001  BITS4(1,0,0,1)
   2832 #define X1010  BITS4(1,0,1,0)
   2833 #define X1011  BITS4(1,0,1,1)
   2834 #define X1100  BITS4(1,1,0,0)
   2835 #define X1101  BITS4(1,1,0,1)
   2836 #define X1110  BITS4(1,1,1,0)
   2837 #define X1111  BITS4(1,1,1,1)
   2838 
   2839 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
   2840    (((((UInt)(zzx7)) & 0xF) << 28) | \
   2841     (((zzx6) & 0xF) << 24) |  \
   2842     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2843     (((zzx3) & 0xF) << 12))
   2844 
   2845 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
   2846    (((((UInt)(zzx7)) & 0xF) << 28) | \
   2847     (((zzx6) & 0xF) << 24) |  \
   2848     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2849     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
   2850 
   2851 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
   2852    (((((UInt)(zzx7)) & 0xF) << 28) | \
   2853     (((zzx6) & 0xF) << 24) |  \
   2854     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2855     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
   2856 
   2857 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
   2858   (((((UInt)(zzx7)) & 0xF) << 28) | \
   2859    (((zzx6) & 0xF) << 24) | \
   2860    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
   2861    (((zzx0) & 0xF) << 0))
   2862 
   2863 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
   2864    (((((UInt)(zzx7)) & 0xF) << 28) | \
   2865     (((zzx6) & 0xF) << 24) |  \
   2866     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2867     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
   2868     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
   2869 
   2870 #define XX______(zzx7,zzx6) \
   2871    (((((UInt)(zzx7)) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
   2872 
   2873 /* Generate a skeletal insn that involves an a RI84 shifter operand.
   2874    Returns a word which is all zeroes apart from bits 25 and 11..0,
   2875    since it is those that encode the shifter operand (at least to the
   2876    extent that we care about it.) */
   2877 static UInt skeletal_RI84 ( ARMRI84* ri )
   2878 {
   2879    UInt instr;
   2880    if (ri->tag == ARMri84_I84) {
   2881       vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
   2882       vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
   2883       instr = 1 << 25;
   2884       instr |= (ri->ARMri84.I84.imm4 << 8);
   2885       instr |= ri->ARMri84.I84.imm8;
   2886    } else {
   2887       instr = 0 << 25;
   2888       instr |= iregEnc(ri->ARMri84.R.reg);
   2889    }
   2890    return instr;
   2891 }
   2892 
   2893 /* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
   2894    11..7. */
   2895 static UInt skeletal_RI5 ( ARMRI5* ri )
   2896 {
   2897    UInt instr;
   2898    if (ri->tag == ARMri5_I5) {
   2899       UInt imm5 = ri->ARMri5.I5.imm5;
   2900       vassert(imm5 >= 1 && imm5 <= 31);
   2901       instr = 0 << 4;
   2902       instr |= imm5 << 7;
   2903    } else {
   2904       instr = 1 << 4;
   2905       instr |= iregEnc(ri->ARMri5.R.reg) << 8;
   2906    }
   2907    return instr;
   2908 }
   2909 
   2910 
   2911 /* Get an immediate into a register, using only that
   2912    register.  (very lame..) */
   2913 static UInt* imm32_to_ireg ( UInt* p, Int rD, UInt imm32 )
   2914 {
   2915    UInt instr;
   2916    vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
   2917 #if 0
   2918    if (0 == (imm32 & ~0xFF)) {
   2919       /* mov with a immediate shifter operand of (0, imm32) (??) */
   2920       instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
   2921       instr |= imm32;
   2922       *p++ = instr;
   2923    } else {
   2924       // this is very bad; causes Dcache pollution
   2925       // ldr  rD, [pc]
   2926       instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
   2927       *p++ = instr;
   2928       // b .+8
   2929       instr = 0xEA000000;
   2930       *p++ = instr;
   2931       // .word imm32
   2932       *p++ = imm32;
   2933    }
   2934 #else
   2935    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2936       /* Generate movw rD, #low16.  Then, if the high 16 are
   2937          nonzero, generate movt rD, #high16. */
   2938       UInt lo16 = imm32 & 0xFFFF;
   2939       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   2940       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   2941                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   2942                        lo16 & 0xF);
   2943       *p++ = instr;
   2944       if (hi16 != 0) {
   2945          instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   2946                           (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   2947                           hi16 & 0xF);
   2948          *p++ = instr;
   2949       }
   2950    } else {
   2951       UInt imm, rot;
   2952       UInt op = X1010;
   2953       UInt rN = 0;
   2954       if ((imm32 & 0xFF) || (imm32 == 0)) {
   2955          imm = imm32 & 0xFF;
   2956          rot = 0;
   2957          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2958          *p++ = instr;
   2959          op = X1000;
   2960          rN = rD;
   2961       }
   2962       if (imm32 & 0xFF000000) {
   2963          imm = (imm32 >> 24) & 0xFF;
   2964          rot = 4;
   2965          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2966          *p++ = instr;
   2967          op = X1000;
   2968          rN = rD;
   2969       }
   2970       if (imm32 & 0xFF0000) {
   2971          imm = (imm32 >> 16) & 0xFF;
   2972          rot = 8;
   2973          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2974          *p++ = instr;
   2975          op = X1000;
   2976          rN = rD;
   2977       }
   2978       if (imm32 & 0xFF00) {
   2979          imm = (imm32 >> 8) & 0xFF;
   2980          rot = 12;
   2981          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
   2982          *p++ = instr;
   2983          op = X1000;
   2984          rN = rD;
   2985       }
   2986    }
   2987 #endif
   2988    return p;
   2989 }
   2990 
   2991 /* Get an immediate into a register, using only that register, and
   2992    generating exactly 2 instructions, regardless of the value of the
   2993    immediate. This is used when generating sections of code that need
   2994    to be patched later, so as to guarantee a specific size. */
   2995 static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
   2996 {
   2997    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   2998       /* Generate movw rD, #low16 ;  movt rD, #high16. */
   2999       UInt lo16 = imm32 & 0xFFFF;
   3000       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   3001       UInt instr;
   3002       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   3003                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   3004                        lo16 & 0xF);
   3005       *p++ = instr;
   3006       instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   3007                        (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   3008                        hi16 & 0xF);
   3009       *p++ = instr;
   3010    } else {
   3011       vassert(0); /* lose */
   3012    }
   3013    return p;
   3014 }
   3015 
   3016 /* Check whether p points at a 2-insn sequence cooked up by
   3017    imm32_to_ireg_EXACTLY2(). */
   3018 static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
   3019 {
   3020    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
   3021       /* Generate movw rD, #low16 ;  movt rD, #high16. */
   3022       UInt lo16 = imm32 & 0xFFFF;
   3023       UInt hi16 = (imm32 >> 16) & 0xFFFF;
   3024       UInt i0, i1;
   3025       i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
   3026                     (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
   3027                     lo16 & 0xF);
   3028       i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
   3029                     (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
   3030                     hi16 & 0xF);
   3031       return p[0] == i0 && p[1] == i1;
   3032    } else {
   3033       vassert(0); /* lose */
   3034    }
   3035 }
   3036 
   3037 
   3038 static UInt* do_load_or_store32 ( UInt* p,
   3039                                   Bool isLoad, UInt rD, ARMAMode1* am )
   3040 {
   3041    vassert(rD <= 12);
   3042    vassert(am->tag == ARMam1_RI); // RR case is not handled
   3043    UInt bB = 0;
   3044    UInt bL = isLoad ? 1 : 0;
   3045    Int  simm12;
   3046    UInt instr, bP;
   3047    if (am->ARMam1.RI.simm13 < 0) {
   3048       bP = 0;
   3049       simm12 = -am->ARMam1.RI.simm13;
   3050    } else {
   3051       bP = 1;
   3052       simm12 = am->ARMam1.RI.simm13;
   3053    }
   3054    vassert(simm12 >= 0 && simm12 <= 4095);
   3055    instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
   3056                     iregEnc(am->ARMam1.RI.reg),
   3057                     rD);
   3058    instr |= simm12;
   3059    *p++ = instr;
   3060    return p;
   3061 }
   3062 
   3063 
   3064 /* Emit an instruction into buf and return the number of bytes used.
   3065    Note that buf is not the insn's final place, and therefore it is
   3066    imperative to emit position-independent code.  If the emitted
   3067    instruction was a profiler inc, set *is_profInc to True, else
   3068    leave it unchanged. */
   3069 
   3070 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
   3071                     UChar* buf, Int nbuf, const ARMInstr* i,
   3072                     Bool mode64, VexEndness endness_host,
   3073                     const void* disp_cp_chain_me_to_slowEP,
   3074                     const void* disp_cp_chain_me_to_fastEP,
   3075                     const void* disp_cp_xindir,
   3076                     const void* disp_cp_xassisted )
   3077 {
   3078    UInt* p = (UInt*)buf;
   3079    vassert(nbuf >= 32);
   3080    vassert(mode64 == False);
   3081    vassert(0 == (((HWord)buf) & 3));
   3082 
   3083    switch (i->tag) {
   3084       case ARMin_Alu: {
   3085          UInt     instr, subopc;
   3086          UInt     rD   = iregEnc(i->ARMin.Alu.dst);
   3087          UInt     rN   = iregEnc(i->ARMin.Alu.argL);
   3088          ARMRI84* argR = i->ARMin.Alu.argR;
   3089          switch (i->ARMin.Alu.op) {
   3090             case ARMalu_ADDS: /* fallthru */
   3091             case ARMalu_ADD:  subopc = X0100; break;
   3092             case ARMalu_ADC:  subopc = X0101; break;
   3093             case ARMalu_SUBS: /* fallthru */
   3094             case ARMalu_SUB:  subopc = X0010; break;
   3095             case ARMalu_SBC:  subopc = X0110; break;
   3096             case ARMalu_AND:  subopc = X0000; break;
   3097             case ARMalu_BIC:  subopc = X1110; break;
   3098             case ARMalu_OR:   subopc = X1100; break;
   3099             case ARMalu_XOR:  subopc = X0001; break;
   3100             default: goto bad;
   3101          }
   3102          instr = skeletal_RI84(argR);
   3103          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3104                            (subopc << 1) & 0xF, rN, rD);
   3105          if (i->ARMin.Alu.op == ARMalu_ADDS
   3106              || i->ARMin.Alu.op == ARMalu_SUBS) {
   3107             instr |= 1<<20;  /* set the S bit */
   3108          }
   3109          *p++ = instr;
   3110          goto done;
   3111       }
   3112       case ARMin_Shift: {
   3113          UInt    instr, subopc;
   3114          UInt    rD   = iregEnc(i->ARMin.Shift.dst);
   3115          UInt    rM   = iregEnc(i->ARMin.Shift.argL);
   3116          ARMRI5* argR = i->ARMin.Shift.argR;
   3117          switch (i->ARMin.Shift.op) {
   3118             case ARMsh_SHL: subopc = X0000; break;
   3119             case ARMsh_SHR: subopc = X0001; break;
   3120             case ARMsh_SAR: subopc = X0010; break;
   3121             default: goto bad;
   3122          }
   3123          instr = skeletal_RI5(argR);
   3124          instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
   3125          instr |= (subopc & 3) << 5;
   3126          *p++ = instr;
   3127          goto done;
   3128       }
   3129       case ARMin_Unary: {
   3130          UInt instr;
   3131          UInt rDst = iregEnc(i->ARMin.Unary.dst);
   3132          UInt rSrc = iregEnc(i->ARMin.Unary.src);
   3133          switch (i->ARMin.Unary.op) {
   3134             case ARMun_CLZ:
   3135                instr = XXXXXXXX(X1110,X0001,X0110,X1111,
   3136                                 rDst,X1111,X0001,rSrc);
   3137                *p++ = instr;
   3138                goto done;
   3139             case ARMun_NEG: /* RSB rD,rS,#0 */
   3140                instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
   3141                *p++ = instr;
   3142                goto done;
   3143             case ARMun_NOT: {
   3144                UInt subopc = X1111; /* MVN */
   3145                instr = rSrc;
   3146                instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3147                                  (subopc << 1) & 0xF, 0, rDst);
   3148                *p++ = instr;
   3149                goto done;
   3150             }
   3151             default:
   3152                break;
   3153          }
   3154          goto bad;
   3155       }
   3156       case ARMin_CmpOrTst: {
   3157          UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
   3158          UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
   3159          UInt SBZ    = 0;
   3160          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3161                            ((subopc << 1) & 0xF) | 1,
   3162                            iregEnc(i->ARMin.CmpOrTst.argL), SBZ );
   3163          *p++ = instr;
   3164          goto done;
   3165       }
   3166       case ARMin_Mov: {
   3167          UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
   3168          UInt subopc = X1101; /* MOV */
   3169          UInt SBZ    = 0;
   3170          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
   3171                            (subopc << 1) & 0xF, SBZ,
   3172                            iregEnc(i->ARMin.Mov.dst));
   3173          *p++ = instr;
   3174          goto done;
   3175       }
   3176       case ARMin_Imm32: {
   3177          p = imm32_to_ireg( (UInt*)p, iregEnc(i->ARMin.Imm32.dst),
   3178                                       i->ARMin.Imm32.imm32 );
   3179          goto done;
   3180       }
   3181       case ARMin_LdSt32:
   3182       case ARMin_LdSt8U: {
   3183          UInt        bL, bB;
   3184          HReg        rD;
   3185          ARMAMode1*  am;
   3186          ARMCondCode cc;
   3187          if (i->tag == ARMin_LdSt32) {
   3188             bB = 0;
   3189             bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
   3190             am = i->ARMin.LdSt32.amode;
   3191             rD = i->ARMin.LdSt32.rD;
   3192             cc = i->ARMin.LdSt32.cc;
   3193          } else {
   3194             bB = 1;
   3195             bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
   3196             am = i->ARMin.LdSt8U.amode;
   3197             rD = i->ARMin.LdSt8U.rD;
   3198             cc = i->ARMin.LdSt8U.cc;
   3199          }
   3200          vassert(cc != ARMcc_NV);
   3201          if (am->tag == ARMam1_RI) {
   3202             Int  simm12;
   3203             UInt instr, bP;
   3204             if (am->ARMam1.RI.simm13 < 0) {
   3205                bP = 0;
   3206                simm12 = -am->ARMam1.RI.simm13;
   3207             } else {
   3208                bP = 1;
   3209                simm12 = am->ARMam1.RI.simm13;
   3210             }
   3211             vassert(simm12 >= 0 && simm12 <= 4095);
   3212             instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
   3213                              iregEnc(am->ARMam1.RI.reg),
   3214                              iregEnc(rD));
   3215             instr |= simm12;
   3216             *p++ = instr;
   3217             goto done;
   3218          } else {
   3219             // RR case
   3220             goto bad;
   3221          }
   3222       }
   3223       case ARMin_LdSt16: {
   3224          HReg        rD = i->ARMin.LdSt16.rD;
   3225          UInt        bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
   3226          UInt        bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
   3227          ARMAMode2*  am = i->ARMin.LdSt16.amode;
   3228          ARMCondCode cc = i->ARMin.LdSt16.cc;
   3229          vassert(cc != ARMcc_NV);
   3230          if (am->tag == ARMam2_RI) {
   3231             HReg rN = am->ARMam2.RI.reg;
   3232             Int  simm8;
   3233             UInt bP, imm8hi, imm8lo, instr;
   3234             if (am->ARMam2.RI.simm9 < 0) {
   3235                bP = 0;
   3236                simm8 = -am->ARMam2.RI.simm9;
   3237             } else {
   3238                bP = 1;
   3239                simm8 = am->ARMam2.RI.simm9;
   3240             }
   3241             vassert(simm8 >= 0 && simm8 <= 255);
   3242             imm8hi = (simm8 >> 4) & 0xF;
   3243             imm8lo = simm8 & 0xF;
   3244             vassert(!(bL == 0 && bS == 1)); // "! signed store"
   3245             /**/ if (bL == 0 && bS == 0) {
   3246                // strh
   3247                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregEnc(rN),
   3248                                 iregEnc(rD), imm8hi, X1011, imm8lo);
   3249                *p++ = instr;
   3250                goto done;
   3251             }
   3252             else if (bL == 1 && bS == 0) {
   3253                // ldrh
   3254                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
   3255                                 iregEnc(rD), imm8hi, X1011, imm8lo);
   3256                *p++ = instr;
   3257                goto done;
   3258             }
   3259             else if (bL == 1 && bS == 1) {
   3260                // ldrsh
   3261                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
   3262                                 iregEnc(rD), imm8hi, X1111, imm8lo);
   3263                *p++ = instr;
   3264                goto done;
   3265             }
   3266             else vassert(0); // ill-constructed insn
   3267          } else {
   3268             // RR case
   3269             goto bad;
   3270          }
   3271       }
   3272       case ARMin_Ld8S: {
   3273          HReg        rD = i->ARMin.Ld8S.rD;
   3274          ARMAMode2*  am = i->ARMin.Ld8S.amode;
   3275          ARMCondCode cc = i->ARMin.Ld8S.cc;
   3276          vassert(cc != ARMcc_NV);
   3277          if (am->tag == ARMam2_RI) {
   3278             HReg rN = am->ARMam2.RI.reg;
   3279             Int  simm8;
   3280             UInt bP, imm8hi, imm8lo, instr;
   3281             if (am->ARMam2.RI.simm9 < 0) {
   3282                bP = 0;
   3283                simm8 = -am->ARMam2.RI.simm9;
   3284             } else {
   3285                bP = 1;
   3286                simm8 = am->ARMam2.RI.simm9;
   3287             }
   3288             vassert(simm8 >= 0 && simm8 <= 255);
   3289             imm8hi = (simm8 >> 4) & 0xF;
   3290             imm8lo = simm8 & 0xF;
   3291             // ldrsb
   3292             instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
   3293                              iregEnc(rD), imm8hi, X1101, imm8lo);
   3294             *p++ = instr;
   3295             goto done;
   3296          } else {
   3297             // RR case
   3298             goto bad;
   3299          }
   3300       }
   3301 
   3302       case ARMin_XDirect: {
   3303          /* NB: what goes on here has to be very closely coordinated
   3304             with the chainXDirect_ARM and unchainXDirect_ARM below. */
   3305          /* We're generating chain-me requests here, so we need to be
   3306             sure this is actually allowed -- no-redir translations
   3307             can't use chain-me's.  Hence: */
   3308          vassert(disp_cp_chain_me_to_slowEP != NULL);
   3309          vassert(disp_cp_chain_me_to_fastEP != NULL);
   3310 
   3311          /* Use ptmp for backpatching conditional jumps. */
   3312          UInt* ptmp = NULL;
   3313 
   3314          /* First off, if this is conditional, create a conditional
   3315             jump over the rest of it.  Or at least, leave a space for
   3316             it that we will shortly fill in. */
   3317          if (i->ARMin.XDirect.cond != ARMcc_AL) {
   3318             vassert(i->ARMin.XDirect.cond != ARMcc_NV);
   3319             ptmp = p;
   3320             *p++ = 0;
   3321          }
   3322 
   3323          /* Update the guest R15T. */
   3324          /* movw r12, lo16(dstGA) */
   3325          /* movt r12, hi16(dstGA) */
   3326          /* str r12, amR15T */
   3327          p = imm32_to_ireg(p, /*r*/12, i->ARMin.XDirect.dstGA);
   3328          p = do_load_or_store32(p, False/*!isLoad*/,
   3329                                 /*r*/12, i->ARMin.XDirect.amR15T);
   3330 
   3331          /* --- FIRST PATCHABLE BYTE follows --- */
   3332          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
   3333             calling to) backs up the return address, so as to find the
   3334             address of the first patchable byte.  So: don't change the
   3335             number of instructions (3) below. */
   3336          /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
   3337          /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
   3338          /* blx  r12  (A1) */
   3339          const void* disp_cp_chain_me
   3340                   = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
   3341                                               : disp_cp_chain_me_to_slowEP;
   3342          p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
   3343                                     (UInt)(Addr)disp_cp_chain_me);
   3344          *p++ = 0xE12FFF3C;
   3345          /* --- END of PATCHABLE BYTES --- */
   3346 
   3347          /* Fix up the conditional jump, if there was one. */
   3348          if (i->ARMin.XDirect.cond != ARMcc_AL) {
   3349             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3350             vassert(delta > 0 && delta < 40);
   3351             vassert((delta & 3) == 0);
   3352             UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
   3353             vassert(notCond <= 13); /* Neither AL nor NV */
   3354             delta = (delta >> 2) - 2;
   3355             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3356          }
   3357          goto done;
   3358       }
   3359 
   3360       case ARMin_XIndir: {
   3361          /* We're generating transfers that could lead indirectly to a
   3362             chain-me, so we need to be sure this is actually allowed
   3363             -- no-redir translations are not allowed to reach normal
   3364             translations without going through the scheduler.  That
   3365             means no XDirects or XIndirs out from no-redir
   3366             translations.  Hence: */
   3367          vassert(disp_cp_xindir != NULL);
   3368 
   3369          /* Use ptmp for backpatching conditional jumps. */
   3370          UInt* ptmp = NULL;
   3371 
   3372          /* First off, if this is conditional, create a conditional
   3373             jump over the rest of it.  Or at least, leave a space for
   3374             it that we will shortly fill in. */
   3375          if (i->ARMin.XIndir.cond != ARMcc_AL) {
   3376             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
   3377             ptmp = p;
   3378             *p++ = 0;
   3379          }
   3380 
   3381          /* Update the guest R15T. */
   3382          /* str r-dstGA, amR15T */
   3383          p = do_load_or_store32(p, False/*!isLoad*/,
   3384                                 iregEnc(i->ARMin.XIndir.dstGA),
   3385                                 i->ARMin.XIndir.amR15T);
   3386 
   3387          /* movw r12, lo16(VG_(disp_cp_xindir)) */
   3388          /* movt r12, hi16(VG_(disp_cp_xindir)) */
   3389          /* bx   r12  (A1) */
   3390          p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xindir);
   3391          *p++ = 0xE12FFF1C;
   3392 
   3393          /* Fix up the conditional jump, if there was one. */
   3394          if (i->ARMin.XIndir.cond != ARMcc_AL) {
   3395             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3396             vassert(delta > 0 && delta < 40);
   3397             vassert((delta & 3) == 0);
   3398             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
   3399             vassert(notCond <= 13); /* Neither AL nor NV */
   3400             delta = (delta >> 2) - 2;
   3401             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3402          }
   3403          goto done;
   3404       }
   3405 
   3406       case ARMin_XAssisted: {
   3407          /* Use ptmp for backpatching conditional jumps. */
   3408          UInt* ptmp = NULL;
   3409 
   3410          /* First off, if this is conditional, create a conditional
   3411             jump over the rest of it.  Or at least, leave a space for
   3412             it that we will shortly fill in. */
   3413          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
   3414             vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
   3415             ptmp = p;
   3416             *p++ = 0;
   3417          }
   3418 
   3419          /* Update the guest R15T. */
   3420          /* str r-dstGA, amR15T */
   3421          p = do_load_or_store32(p, False/*!isLoad*/,
   3422                                 iregEnc(i->ARMin.XAssisted.dstGA),
   3423                                 i->ARMin.XAssisted.amR15T);
   3424 
   3425          /* movw r8,  $magic_number */
   3426          UInt trcval = 0;
   3427          switch (i->ARMin.XAssisted.jk) {
   3428             case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
   3429             case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
   3430             //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
   3431             case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
   3432             //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
   3433             //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
   3434             case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
   3435             case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
   3436             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
   3437             //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
   3438             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
   3439             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
   3440             /* We don't expect to see the following being assisted. */
   3441             //case Ijk_Ret:
   3442             //case Ijk_Call:
   3443             /* fallthrough */
   3444             default:
   3445                ppIRJumpKind(i->ARMin.XAssisted.jk);
   3446                vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
   3447          }
   3448          vassert(trcval != 0);
   3449          p = imm32_to_ireg(p, /*r*/8, trcval);
   3450 
   3451          /* movw r12, lo16(VG_(disp_cp_xassisted)) */
   3452          /* movt r12, hi16(VG_(disp_cp_xassisted)) */
   3453          /* bx   r12  (A1) */
   3454          p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xassisted);
   3455          *p++ = 0xE12FFF1C;
   3456 
   3457          /* Fix up the conditional jump, if there was one. */
   3458          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
   3459             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3460             vassert(delta > 0 && delta < 40);
   3461             vassert((delta & 3) == 0);
   3462             UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
   3463             vassert(notCond <= 13); /* Neither AL nor NV */
   3464             delta = (delta >> 2) - 2;
   3465             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3466          }
   3467          goto done;
   3468       }
   3469 
   3470       case ARMin_CMov: {
   3471          UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
   3472          UInt subopc = X1101; /* MOV */
   3473          UInt SBZ    = 0;
   3474          instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
   3475                            (subopc << 1) & 0xF, SBZ,
   3476                            iregEnc(i->ARMin.CMov.dst));
   3477          *p++ = instr;
   3478          goto done;
   3479       }
   3480 
   3481       case ARMin_Call: {
   3482          UInt instr;
   3483          /* Decide on a scratch reg used to hold to the call address.
   3484             This has to be done as per the comments in getRegUsage. */
   3485          Int scratchNo;
   3486          switch (i->ARMin.Call.nArgRegs) {
   3487             case 0:  scratchNo = 0;  break;
   3488             case 1:  scratchNo = 1;  break;
   3489             case 2:  scratchNo = 2;  break;
   3490             case 3:  scratchNo = 3;  break;
   3491             case 4:  scratchNo = 11; break;
   3492             default: vassert(0);
   3493          }
   3494          /* If we don't need to do any fixup actions in the case that
   3495             the call doesn't happen, just do the simple thing and emit
   3496             straight-line code.  We hope this is the common case. */
   3497          if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
   3498              || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
   3499             // r"scratchNo" = &target
   3500             p = imm32_to_ireg( (UInt*)p,
   3501                                scratchNo, (UInt)i->ARMin.Call.target );
   3502             // blx{cond} r"scratchNo"
   3503             instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
   3504                              X0011, scratchNo);
   3505             instr |= 0xFFF << 8; // stick in the SBOnes
   3506             *p++ = instr;
   3507          } else {
   3508             Int delta;
   3509             /* Complex case.  We have to generate an if-then-else
   3510                diamond. */
   3511             // before:
   3512             //   b{!cond} else:
   3513             //   r"scratchNo" = &target
   3514             //   blx{AL} r"scratchNo"
   3515             // preElse:
   3516             //   b after:
   3517             // else:
   3518             //   mov r0, #0x55555555  // possibly
   3519             //   mov r1, r0           // possibly
   3520             // after:
   3521 
   3522             // before:
   3523             UInt* pBefore = p;
   3524 
   3525             //   b{!cond} else:  // ptmp1 points here
   3526             *p++ = 0; // filled in later
   3527 
   3528             //   r"scratchNo" = &target
   3529             p = imm32_to_ireg( (UInt*)p,
   3530                                scratchNo, (UInt)i->ARMin.Call.target );
   3531 
   3532             //   blx{AL} r"scratchNo"
   3533             instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
   3534                              X0011, scratchNo);
   3535             instr |= 0xFFF << 8; // stick in the SBOnes
   3536             *p++ = instr;
   3537 
   3538             // preElse:
   3539             UInt* pPreElse = p;
   3540 
   3541             //   b after:
   3542             *p++ = 0; // filled in later
   3543 
   3544             // else:
   3545             delta = (UChar*)p - (UChar*)pBefore;
   3546             delta = (delta >> 2) - 2;
   3547             *pBefore
   3548                = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
   3549 
   3550             /* Do the 'else' actions */
   3551             switch (i->ARMin.Call.rloc.pri) {
   3552                case RLPri_Int:
   3553                   p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
   3554                   break;
   3555                case RLPri_2Int:
   3556                   vassert(0); //ATC
   3557                   p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
   3558                   /* mov r1, r0 */
   3559                   *p++ = 0xE1A01000;
   3560                   break;
   3561                case RLPri_None: case RLPri_INVALID: default:
   3562                   vassert(0);
   3563             }
   3564 
   3565             // after:
   3566             delta = (UChar*)p - (UChar*)pPreElse;
   3567             delta = (delta >> 2) - 2;
   3568             *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
   3569          }
   3570 
   3571          goto done;
   3572       }
   3573 
   3574       case ARMin_Mul: {
   3575          /* E0000392   mul     r0, r2, r3
   3576             E0810392   umull   r0(LO), r1(HI), r2, r3
   3577             E0C10392   smull   r0(LO), r1(HI), r2, r3
   3578          */
   3579          switch (i->ARMin.Mul.op) {
   3580             case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
   3581             case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
   3582             case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
   3583             default: vassert(0);
   3584          }
   3585          goto bad;
   3586       }
   3587       case ARMin_LdrEX: {
   3588          /* E1D42F9F   ldrexb r2, [r4]
   3589             E1F42F9F   ldrexh r2, [r4]
   3590             E1942F9F   ldrex  r2, [r4]
   3591             E1B42F9F   ldrexd r2, r3, [r4]
   3592          */
   3593          switch (i->ARMin.LdrEX.szB) {
   3594             case 1: *p++ = 0xE1D42F9F; goto done;
   3595             case 2: *p++ = 0xE1F42F9F; goto done;
   3596             case 4: *p++ = 0xE1942F9F; goto done;
   3597             case 8: *p++ = 0xE1B42F9F; goto done;
   3598             default: break;
   3599          }
   3600          goto bad;
   3601       }
   3602       case ARMin_StrEX: {
   3603          /* E1C40F92   strexb r0, r2, [r4]
   3604             E1E40F92   strexh r0, r2, [r4]
   3605             E1840F92   strex  r0, r2, [r4]
   3606             E1A40F92   strexd r0, r2, r3, [r4]
   3607          */
   3608          switch (i->ARMin.StrEX.szB) {
   3609             case 1: *p++ = 0xE1C40F92; goto done;
   3610             case 2: *p++ = 0xE1E40F92; goto done;
   3611             case 4: *p++ = 0xE1840F92; goto done;
   3612             case 8: *p++ = 0xE1A40F92; goto done;
   3613             default: break;
   3614          }
   3615          goto bad;
   3616       }
   3617       case ARMin_VLdStD: {
   3618          UInt dD     = dregEnc(i->ARMin.VLdStD.dD);
   3619          UInt rN     = iregEnc(i->ARMin.VLdStD.amode->reg);
   3620          Int  simm11 = i->ARMin.VLdStD.amode->simm11;
   3621          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3622          UInt bU     = simm11 >= 0 ? 1 : 0;
   3623          UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
   3624          UInt insn;
   3625          vassert(0 == (off8 & 3));
   3626          off8 >>= 2;
   3627          vassert(0 == (off8 & 0xFFFFFF00));
   3628          insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
   3629          insn |= off8;
   3630          *p++ = insn;
   3631          goto done;
   3632       }
   3633       case ARMin_VLdStS: {
   3634          UInt fD     = fregEnc(i->ARMin.VLdStS.fD);
   3635          UInt rN     = iregEnc(i->ARMin.VLdStS.amode->reg);
   3636          Int  simm11 = i->ARMin.VLdStS.amode->simm11;
   3637          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
   3638          UInt bU     = simm11 >= 0 ? 1 : 0;
   3639          UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
   3640          UInt bD     = fD & 1;
   3641          UInt insn;
   3642          vassert(0 == (off8 & 3));
   3643          off8 >>= 2;
   3644          vassert(0 == (off8 & 0xFFFFFF00));
   3645          insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
   3646          insn |= off8;
   3647          *p++ = insn;
   3648          goto done;
   3649       }
   3650       case ARMin_VAluD: {
   3651          UInt dN = dregEnc(i->ARMin.VAluD.argL);
   3652          UInt dD = dregEnc(i->ARMin.VAluD.dst);
   3653          UInt dM = dregEnc(i->ARMin.VAluD.argR);
   3654          UInt pqrs = X1111; /* undefined */
   3655          switch (i->ARMin.VAluD.op) {
   3656             case ARMvfp_ADD: pqrs = X0110; break;
   3657             case ARMvfp_SUB: pqrs = X0111; break;
   3658             case ARMvfp_MUL: pqrs = X0100; break;
   3659             case ARMvfp_DIV: pqrs = X1000; break;
   3660             default: goto bad;
   3661          }
   3662          vassert(pqrs != X1111);
   3663          UInt bP  = (pqrs >> 3) & 1;
   3664          UInt bQ  = (pqrs >> 2) & 1;
   3665          UInt bR  = (pqrs >> 1) & 1;
   3666          UInt bS  = (pqrs >> 0) & 1;
   3667          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
   3668                               X1011, BITS4(0,bS,0,0), dM);
   3669          *p++ = insn;
   3670          goto done;
   3671       }
   3672       case ARMin_VAluS: {
   3673          UInt dN = fregEnc(i->ARMin.VAluS.argL);
   3674          UInt dD = fregEnc(i->ARMin.VAluS.dst);
   3675          UInt dM = fregEnc(i->ARMin.VAluS.argR);
   3676          UInt bN = dN & 1;
   3677          UInt bD = dD & 1;
   3678          UInt bM = dM & 1;
   3679          UInt pqrs = X1111; /* undefined */
   3680          switch (i->ARMin.VAluS.op) {
   3681             case ARMvfp_ADD: pqrs = X0110; break;
   3682             case ARMvfp_SUB: pqrs = X0111; break;
   3683             case ARMvfp_MUL: pqrs = X0100; break;
   3684             case ARMvfp_DIV: pqrs = X1000; break;
   3685             default: goto bad;
   3686          }
   3687          vassert(pqrs != X1111);
   3688          UInt bP  = (pqrs >> 3) & 1;
   3689          UInt bQ  = (pqrs >> 2) & 1;
   3690          UInt bR  = (pqrs >> 1) & 1;
   3691          UInt bS  = (pqrs >> 0) & 1;
   3692          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
   3693                               (dN >> 1), (dD >> 1),
   3694                               X1010, BITS4(bN,bS,bM,0), (dM >> 1));
   3695          *p++ = insn;
   3696          goto done;
   3697       }
   3698       case ARMin_VUnaryD: {
   3699          UInt dD   = dregEnc(i->ARMin.VUnaryD.dst);
   3700          UInt dM   = dregEnc(i->ARMin.VUnaryD.src);
   3701          UInt insn = 0;
   3702          switch (i->ARMin.VUnaryD.op) {
   3703             case ARMvfpu_COPY:
   3704                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3705                break;
   3706             case ARMvfpu_ABS:
   3707                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
   3708                break;
   3709             case ARMvfpu_NEG:
   3710                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
   3711                break;
   3712             case ARMvfpu_SQRT:
   3713                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
   3714                break;
   3715             default:
   3716                goto bad;
   3717          }
   3718          *p++ = insn;
   3719          goto done;
   3720       }
   3721       case ARMin_VUnaryS: {
   3722          UInt fD   = fregEnc(i->ARMin.VUnaryS.dst);
   3723          UInt fM   = fregEnc(i->ARMin.VUnaryS.src);
   3724          UInt insn = 0;
   3725          switch (i->ARMin.VUnaryS.op) {
   3726             case ARMvfpu_COPY:
   3727                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3728                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3729                                (fM >> 1));
   3730                break;
   3731             case ARMvfpu_ABS:
   3732                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
   3733                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3734                                (fM >> 1));
   3735                break;
   3736             case ARMvfpu_NEG:
   3737                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3738                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
   3739                                (fM >> 1));
   3740                break;
   3741             case ARMvfpu_SQRT:
   3742                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
   3743                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
   3744                                (fM >> 1));
   3745                break;
   3746             default:
   3747                goto bad;
   3748          }
   3749          *p++ = insn;
   3750          goto done;
   3751       }
   3752       case ARMin_VCmpD: {
   3753          UInt dD   = dregEnc(i->ARMin.VCmpD.argL);
   3754          UInt dM   = dregEnc(i->ARMin.VCmpD.argR);
   3755          UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
   3756          *p++ = insn;       /* FCMPD dD, dM */
   3757          *p++ = 0xEEF1FA10; /* FMSTAT */
   3758          goto done;
   3759       }
   3760       case ARMin_VCMovD: {
   3761          UInt cc = (UInt)i->ARMin.VCMovD.cond;
   3762          UInt dD = dregEnc(i->ARMin.VCMovD.dst);
   3763          UInt dM = dregEnc(i->ARMin.VCMovD.src);
   3764          vassert(cc < 16 && cc != ARMcc_AL);
   3765          UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
   3766          *p++ = insn;
   3767          goto done;
   3768       }
   3769       case ARMin_VCMovS: {
   3770          UInt cc = (UInt)i->ARMin.VCMovS.cond;
   3771          UInt fD = fregEnc(i->ARMin.VCMovS.dst);
   3772          UInt fM = fregEnc(i->ARMin.VCMovS.src);
   3773          vassert(cc < 16 && cc != ARMcc_AL);
   3774          UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
   3775                               X0000,(fD >> 1),X1010,
   3776                               BITS4(0,1,(fM & 1),0), (fM >> 1));
   3777          *p++ = insn;
   3778          goto done;
   3779       }
   3780       case ARMin_VCvtSD: {
   3781          if (i->ARMin.VCvtSD.sToD) {
   3782             UInt dD = dregEnc(i->ARMin.VCvtSD.dst);
   3783             UInt fM = fregEnc(i->ARMin.VCvtSD.src);
   3784             UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
   3785                                  BITS4(1,1, (fM & 1), 0),
   3786                                  (fM >> 1));
   3787             *p++ = insn;
   3788             goto done;
   3789          } else {
   3790             UInt fD = fregEnc(i->ARMin.VCvtSD.dst);
   3791             UInt dM = dregEnc(i->ARMin.VCvtSD.src);
   3792             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
   3793                                  X0111, (fD >> 1),
   3794                                  X1011, X1100, dM);
   3795             *p++ = insn;
   3796             goto done;
   3797          }
   3798       }
   3799       case ARMin_VXferQ: {
   3800          UInt insn;
   3801          UInt qD  = qregEnc(i->ARMin.VXferQ.qD);
   3802          UInt dHi = dregEnc(i->ARMin.VXferQ.dHi);
   3803          UInt dLo = dregEnc(i->ARMin.VXferQ.dLo);
   3804          /* This is a bit tricky.  We need to make 2 D-D moves and we rely
   3805             on the fact that the Q register can be treated as two D registers.
   3806             We also rely on the fact that the register allocator will allocate
   3807             the two D's and the Q to disjoint parts of the register file,
   3808             and so we don't have to worry about the first move's destination
   3809             being the same as the second move's source, etc.  We do have
   3810             assertions though. */
   3811          /* The ARM ARM specifies that
   3812               D<2n>   maps to the least significant half of Q<n>
   3813               D<2n+1> maps to the most  significant half of Q<n>
   3814             So there are no issues with endianness here.
   3815          */
   3816          UInt qDlo = 2 * qD + 0;
   3817          UInt qDhi = 2 * qD + 1;
   3818          /* Stay sane .. */
   3819          vassert(qDhi != dHi && qDhi != dLo);
   3820          vassert(qDlo != dHi && qDlo != dLo);
   3821          /* vmov dX, dY is
   3822             F 2 (0,dX[4],1,0) dY[3:0] dX[3:0] 1 (dY[4],0,dY[4],1) dY[3:0]
   3823          */
   3824 #        define VMOV_D_D(_xx,_yy) \
   3825             XXXXXXXX( 0xF, 0x2, BITS4(0, (((_xx) >> 4) & 1), 1, 0), \
   3826                       ((_yy) & 0xF), ((_xx) & 0xF), 0x1, \
   3827                       BITS4( (((_yy) >> 4) & 1), 0, (((_yy) >> 4) & 1), 1), \
   3828                       ((_yy) & 0xF) )
   3829          if (i->ARMin.VXferQ.toQ) {
   3830             insn = VMOV_D_D(qDlo, dLo); *p++ = insn;
   3831             insn = VMOV_D_D(qDhi, dHi); *p++ = insn;
   3832          } else {
   3833             insn = VMOV_D_D(dLo, qDlo); *p++ = insn;
   3834             insn = VMOV_D_D(dHi, qDhi); *p++ = insn;
   3835          }
   3836 #        undef VMOV_D_D
   3837          goto done;
   3838       }
   3839       case ARMin_VXferD: {
   3840          UInt dD  = dregEnc(i->ARMin.VXferD.dD);
   3841          UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
   3842          UInt rLo = iregEnc(i->ARMin.VXferD.rLo);
   3843          /* vmov dD, rLo, rHi is
   3844             E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3845             vmov rLo, rHi, dD is
   3846             E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
   3847          */
   3848          UInt insn
   3849             = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
   3850                        rHi, rLo, 0xB,
   3851                        BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
   3852          *p++ = insn;
   3853          goto done;
   3854       }
   3855       case ARMin_VXferS: {
   3856          UInt fD  = fregEnc(i->ARMin.VXferS.fD);
   3857          UInt rLo = iregEnc(i->ARMin.VXferS.rLo);
   3858          /* vmov fD, rLo is
   3859             E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
   3860             vmov rLo, fD is
   3861             E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
   3862          */
   3863          UInt insn
   3864             = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
   3865                        (fD >> 1) & 0xF, rLo, 0xA,
   3866                        BITS4((fD & 1),0,0,1), 0);
   3867          *p++ = insn;
   3868          goto done;
   3869       }
   3870       case ARMin_VCvtID: {
   3871          Bool iToD = i->ARMin.VCvtID.iToD;
   3872          Bool syned = i->ARMin.VCvtID.syned;
   3873          if (iToD && syned) {
   3874             // FSITOD: I32S-in-freg to F64-in-dreg
   3875             UInt regF = fregEnc(i->ARMin.VCvtID.src);
   3876             UInt regD = dregEnc(i->ARMin.VCvtID.dst);
   3877             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3878                                  X1011, BITS4(1,1,(regF & 1),0),
   3879                                  (regF >> 1) & 0xF);
   3880             *p++ = insn;
   3881             goto done;
   3882          }
   3883          if (iToD && (!syned)) {
   3884             // FUITOD: I32U-in-freg to F64-in-dreg
   3885             UInt regF = fregEnc(i->ARMin.VCvtID.src);
   3886             UInt regD = dregEnc(i->ARMin.VCvtID.dst);
   3887             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
   3888                                  X1011, BITS4(0,1,(regF & 1),0),
   3889                                  (regF >> 1) & 0xF);
   3890             *p++ = insn;
   3891             goto done;
   3892          }
   3893          if ((!iToD) && syned) {
   3894             // FTOSID: F64-in-dreg to I32S-in-freg
   3895             UInt regD = dregEnc(i->ARMin.VCvtID.src);
   3896             UInt regF = fregEnc(i->ARMin.VCvtID.dst);
   3897             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3898                                  X1101, (regF >> 1) & 0xF,
   3899                                  X1011, X0100, regD);
   3900             *p++ = insn;
   3901             goto done;
   3902          }
   3903          if ((!iToD) && (!syned)) {
   3904             // FTOUID: F64-in-dreg to I32U-in-freg
   3905             UInt regD = dregEnc(i->ARMin.VCvtID.src);
   3906             UInt regF = fregEnc(i->ARMin.VCvtID.dst);
   3907             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
   3908                                  X1100, (regF >> 1) & 0xF,
   3909                                  X1011, X0100, regD);
   3910             *p++ = insn;
   3911             goto done;
   3912          }
   3913          /*UNREACHED*/
   3914          vassert(0);
   3915       }
   3916       case ARMin_VRIntR: { /* NB: ARM v8 and above only */
   3917          Bool isF64 = i->ARMin.VRIntR.isF64;
   3918          UInt rDst  = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.dst);
   3919          UInt rSrc  = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.src);
   3920          /* The encoding of registers here differs strangely for the
   3921             F32 and F64 cases. */
   3922          UInt D, Vd, M, Vm;
   3923          if (isF64) {
   3924             D  = (rDst >> 4) & 1;
   3925             Vd = rDst & 0xF;
   3926             M  = (rSrc >> 4) & 1;
   3927             Vm = rSrc & 0xF;
   3928          } else {
   3929             Vd = (rDst >> 1) & 0xF;
   3930             D  = rDst & 1;
   3931             Vm = (rSrc >> 1) & 0xF;
   3932             M  = rSrc & 1;
   3933          }
   3934          vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15);
   3935          *p++ = XXXXXXXX(0xE, X1110, X1011 | (D << 2), X0110, Vd,
   3936                          isF64 ? X1011 : X1010, X0100 | (M << 1), Vm);
   3937          goto done;
   3938       }
   3939       case ARMin_VMinMaxNum: {
   3940          Bool isF64 = i->ARMin.VMinMaxNum.isF64;
   3941          Bool isMax = i->ARMin.VMinMaxNum.isMax;
   3942          UInt rDst  = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.dst);
   3943          UInt rSrcL = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcL);
   3944          UInt rSrcR = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcR);
   3945          /* The encoding of registers here differs strangely for the
   3946             F32 and F64 cases. */
   3947          UInt D, Vd, N, Vn, M, Vm;
   3948          if (isF64) {
   3949             D  = (rDst >> 4) & 1;
   3950             Vd = rDst & 0xF;
   3951             N  = (rSrcL >> 4) & 1;
   3952             Vn = rSrcL & 0xF;
   3953             M  = (rSrcR >> 4) & 1;
   3954             Vm = rSrcR & 0xF;
   3955          } else {
   3956             Vd = (rDst >> 1) & 0xF;
   3957             D  = rDst & 1;
   3958             Vn = (rSrcL >> 1) & 0xF;
   3959             N  = rSrcL & 1;
   3960             Vm = (rSrcR >> 1) & 0xF;
   3961             M  = rSrcR & 1;
   3962          }
   3963          vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15 && N <= 1
   3964                  && Vn <= 15);
   3965          *p++ = XXXXXXXX(X1111,X1110, X1000 | (D << 2), Vn, Vd,
   3966                          X1010 | (isF64 ? 1 : 0),
   3967                          (N << 3) | ((isMax ? 0 : 1) << 2) | (M << 1) | 0,
   3968                          Vm);
   3969          goto done;
   3970       }
   3971       case ARMin_FPSCR: {
   3972          Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
   3973          UInt iReg    = iregEnc(i->ARMin.FPSCR.iReg);
   3974          if (toFPSCR) {
   3975             /* fmxr fpscr, iReg is EEE1 iReg A10 */
   3976             *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
   3977             goto done;
   3978          }
   3979          goto bad; // FPSCR -> iReg case currently ATC
   3980       }
   3981       case ARMin_MFence: {
   3982          // It's not clear (to me) how these relate to the ARMv7
   3983          // versions, so let's just use the v7 versions as they
   3984          // are at least well documented.
   3985          //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
   3986          //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
   3987          //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
   3988          *p++ = 0xF57FF04F; /* DSB sy */
   3989          *p++ = 0xF57FF05F; /* DMB sy */
   3990          *p++ = 0xF57FF06F; /* ISB */
   3991          goto done;
   3992       }
   3993       case ARMin_CLREX: {
   3994          *p++ = 0xF57FF01F; /* clrex */
   3995          goto done;
   3996       }
   3997 
   3998       case ARMin_NLdStQ: {
   3999          UInt regD = qregEnc(i->ARMin.NLdStQ.dQ) << 1;
   4000          UInt regN, regM;
   4001          UInt D = regD >> 4;
   4002          UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
   4003          UInt insn;
   4004          vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
   4005          regD &= 0xF;
   4006          if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
   4007             regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
   4008             regM = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
   4009          } else {
   4010             regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
   4011             regM = 15;
   4012          }
   4013          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   4014                               regN, regD, X1010, X1000, regM);
   4015          *p++ = insn;
   4016          goto done;
   4017       }
   4018       case ARMin_NLdStD: {
   4019          UInt regD = dregEnc(i->ARMin.NLdStD.dD);
   4020          UInt regN, regM;
   4021          UInt D = regD >> 4;
   4022          UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
   4023          UInt insn;
   4024          vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
   4025          regD &= 0xF;
   4026          if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
   4027             regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
   4028             regM = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
   4029          } else {
   4030             regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.R.rN);
   4031             regM = 15;
   4032          }
   4033          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
   4034                               regN, regD, X0111, X1000, regM);
   4035          *p++ = insn;
   4036          goto done;
   4037       }
   4038       case ARMin_NUnaryS: {
   4039          UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
   4040          UInt regD, D;
   4041          UInt regM, M;
   4042          UInt size = i->ARMin.NUnaryS.size;
   4043          UInt insn;
   4044          UInt opc, opc1, opc2;
   4045          switch (i->ARMin.NUnaryS.op) {
   4046 	    case ARMneon_VDUP:
   4047                if (i->ARMin.NUnaryS.size >= 16)
   4048                   goto bad;
   4049                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
   4050                   goto bad;
   4051                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   4052                   goto bad;
   4053                regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
   4054                         ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1)
   4055                         : dregEnc(i->ARMin.NUnaryS.dst->reg);
   4056                regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
   4057                         ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1)
   4058                         : dregEnc(i->ARMin.NUnaryS.src->reg);
   4059                D = regD >> 4;
   4060                M = regM >> 4;
   4061                regD &= 0xf;
   4062                regM &= 0xf;
   4063                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
   4064                                (i->ARMin.NUnaryS.size & 0xf), regD,
   4065                                X1100, BITS4(0,Q,M,0), regM);
   4066                *p++ = insn;
   4067                goto done;
   4068             case ARMneon_SETELEM:
   4069                regD = Q ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1) :
   4070                                 dregEnc(i->ARMin.NUnaryS.dst->reg);
   4071                regM = iregEnc(i->ARMin.NUnaryS.src->reg);
   4072                M = regM >> 4;
   4073                D = regD >> 4;
   4074                regM &= 0xF;
   4075                regD &= 0xF;
   4076                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
   4077                   goto bad;
   4078                switch (size) {
   4079                   case 0:
   4080                      if (i->ARMin.NUnaryS.dst->index > 7)
   4081                         goto bad;
   4082                      opc = X1000 | i->ARMin.NUnaryS.dst->index;
   4083                      break;
   4084                   case 1:
   4085                      if (i->ARMin.NUnaryS.dst->index > 3)
   4086                         goto bad;
   4087                      opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
   4088                      break;
   4089                   case 2:
   4090                      if (i->ARMin.NUnaryS.dst->index > 1)
   4091                         goto bad;
   4092                      opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
   4093                      break;
   4094                   default:
   4095                      goto bad;
   4096                }
   4097                opc1 = (opc >> 2) & 3;
   4098                opc2 = opc & 3;
   4099                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
   4100                                regD, regM, X1011,
   4101                                BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
   4102                *p++ = insn;
   4103                goto done;
   4104             case ARMneon_GETELEMU:
   4105                regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
   4106                                 dregEnc(i->ARMin.NUnaryS.src->reg);
   4107                regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
   4108                M = regM >> 4;
   4109                D = regD >> 4;
   4110                regM &= 0xF;
   4111                regD &= 0xF;
   4112                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   4113                   goto bad;
   4114                switch (size) {
   4115                   case 0:
   4116                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   4117                         regM++;
   4118                         i->ARMin.NUnaryS.src->index -= 8;
   4119                      }
   4120                      if (i->ARMin.NUnaryS.src->index > 7)
   4121                         goto bad;
   4122                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   4123                      break;
   4124                   case 1:
   4125                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   4126                         regM++;
   4127                         i->ARMin.NUnaryS.src->index -= 4;
   4128                      }
   4129                      if (i->ARMin.NUnaryS.src->index > 3)
   4130                         goto bad;
   4131                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   4132                      break;
   4133                   case 2:
   4134                      goto bad;
   4135                   default:
   4136                      goto bad;
   4137                }
   4138                opc1 = (opc >> 2) & 3;
   4139                opc2 = opc & 3;
   4140                insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
   4141                                regM, regD, X1011,
   4142                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   4143                *p++ = insn;
   4144                goto done;
   4145             case ARMneon_GETELEMS:
   4146                regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
   4147                                 dregEnc(i->ARMin.NUnaryS.src->reg);
   4148                regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
   4149                M = regM >> 4;
   4150                D = regD >> 4;
   4151                regM &= 0xF;
   4152                regD &= 0xF;
   4153                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
   4154                   goto bad;
   4155                switch (size) {
   4156                   case 0:
   4157                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
   4158                         regM++;
   4159                         i->ARMin.NUnaryS.src->index -= 8;
   4160                      }
   4161                      if (i->ARMin.NUnaryS.src->index > 7)
   4162                         goto bad;
   4163                      opc = X1000 | i->ARMin.NUnaryS.src->index;
   4164                      break;
   4165                   case 1:
   4166                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
   4167                         regM++;
   4168                         i->ARMin.NUnaryS.src->index -= 4;
   4169                      }
   4170                      if (i->ARMin.NUnaryS.src->index > 3)
   4171                         goto bad;
   4172                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
   4173                      break;
   4174                   case 2:
   4175                      if (Q && i->ARMin.NUnaryS.src->index > 1) {
   4176                         regM++;
   4177                         i->ARMin.NUnaryS.src->index -= 2;
   4178                      }
   4179                      if (i->ARMin.NUnaryS.src->index > 1)
   4180                         goto bad;
   4181                      opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
   4182                      break;
   4183                   default:
   4184                      goto bad;
   4185                }
   4186                opc1 = (opc >> 2) & 3;
   4187                opc2 = opc & 3;
   4188                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
   4189                                regM, regD, X1011,
   4190                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
   4191                *p++ = insn;
   4192                goto done;
   4193             default:
   4194                goto bad;
   4195          }
   4196       }
   4197       case ARMin_NUnary: {
   4198          UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
   4199          UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
   4200                        ? (qregEnc(i->ARMin.NUnary.dst) << 1)
   4201                        : dregEnc(i->ARMin.NUnary.dst);
   4202          UInt regM, M;
   4203          UInt D = regD >> 4;
   4204          UInt sz1 = i->ARMin.NUnary.size >> 1;
   4205          UInt sz2 = i->ARMin.NUnary.size & 1;
   4206          UInt sz = i->ARMin.NUnary.size;
   4207          UInt insn;
   4208          UInt F = 0; /* TODO: floating point EQZ ??? */
   4209          if (i->ARMin.NUnary.op != ARMneon_DUP) {
   4210             regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
   4211                      ? (qregEnc(i->ARMin.NUnary.src) << 1)
   4212                      : dregEnc(i->ARMin.NUnary.src);
   4213             M = regM >> 4;
   4214          } else {
   4215             regM = iregEnc(i->ARMin.NUnary.src);
   4216             M = regM >> 4;
   4217          }
   4218          regD &= 0xF;
   4219          regM &= 0xF;
   4220          switch (i->ARMin.NUnary.op) {
   4221             case ARMneon_COPY: /* VMOV reg, reg */
   4222                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
   4223                                BITS4(M,Q,M,1), regM);
   4224                break;
   4225             case ARMneon_COPYN: /* VMOVN regD, regQ */
   4226                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4227                                regD, X0010, BITS4(0,0,M,0), regM);
   4228                break;
   4229             case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
   4230                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4231                                regD, X0010, BITS4(1,0,M,0), regM);
   4232                break;
   4233             case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
   4234                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4235                                regD, X0010, BITS4(0,1,M,0), regM);
   4236                break;
   4237             case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
   4238                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4239                                regD, X0010, BITS4(1,1,M,0), regM);
   4240                break;
   4241             case ARMneon_COPYLS: /* VMOVL regQ, regD */
   4242                if (sz >= 3)
   4243                   goto bad;
   4244                insn = XXXXXXXX(0xF, X0010,
   4245                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   4246                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   4247                                regD, X1010, BITS4(0,0,M,1), regM);
   4248                break;
   4249             case ARMneon_COPYLU: /* VMOVL regQ, regD */
   4250                if (sz >= 3)
   4251                   goto bad;
   4252                insn = XXXXXXXX(0xF, X0011,
   4253                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
   4254                                BITS4((sz == 0) ? 1 : 0,0,0,0),
   4255                                regD, X1010, BITS4(0,0,M,1), regM);
   4256                break;
   4257             case ARMneon_NOT: /* VMVN reg, reg*/
   4258                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   4259                                BITS4(1,Q,M,0), regM);
   4260                break;
   4261             case ARMneon_EQZ:
   4262                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   4263                                regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
   4264                break;
   4265             case ARMneon_CNT:
   4266                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
   4267                                BITS4(0,Q,M,0), regM);
   4268                break;
   4269             case ARMneon_CLZ:
   4270                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4271                                regD, X0100, BITS4(1,Q,M,0), regM);
   4272                break;
   4273             case ARMneon_CLS:
   4274                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4275                                regD, X0100, BITS4(0,Q,M,0), regM);
   4276                break;
   4277             case ARMneon_ABS:
   4278                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
   4279                                regD, X0011, BITS4(0,Q,M,0), regM);
   4280                break;
   4281             case ARMneon_DUP:
   4282                sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
   4283                sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
   4284                vassert(sz1 + sz2 < 2);
   4285                insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
   4286                                X1011, BITS4(D,0,sz2,1), X0000);
   4287                break;
   4288             case ARMneon_REV16:
   4289                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4290                                regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
   4291                break;
   4292             case ARMneon_REV32:
   4293                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4294                                regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
   4295                break;
   4296             case ARMneon_REV64:
   4297                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4298                                regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
   4299                break;
   4300             case ARMneon_PADDLU:
   4301                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4302                                regD, X0010, BITS4(1,Q,M,0), regM);
   4303                break;
   4304             case ARMneon_PADDLS:
   4305                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
   4306                                regD, X0010, BITS4(0,Q,M,0), regM);
   4307                break;
   4308             case ARMneon_VQSHLNUU:
   4309                insn = XXXXXXXX(0xF, X0011,
   4310                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   4311                                sz & 0xf, regD, X0111,
   4312                                BITS4(sz >> 6,Q,M,1), regM);
   4313                break;
   4314             case ARMneon_VQSHLNSS:
   4315                insn = XXXXXXXX(0xF, X0010,
   4316                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   4317                                sz & 0xf, regD, X0111,
   4318                                BITS4(sz >> 6,Q,M,1), regM);
   4319                break;
   4320             case ARMneon_VQSHLNUS:
   4321                insn = XXXXXXXX(0xF, X0011,
   4322                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
   4323                                sz & 0xf, regD, X0110,
   4324                                BITS4(sz >> 6,Q,M,1), regM);
   4325                break;
   4326             case ARMneon_VCVTFtoS:
   4327                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   4328                                BITS4(0,Q,M,0), regM);
   4329                break;
   4330             case ARMneon_VCVTFtoU:
   4331                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
   4332                                BITS4(1,Q,M,0), regM);
   4333                break;
   4334             case ARMneon_VCVTStoF:
   4335                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   4336                                BITS4(0,Q,M,0), regM);
   4337                break;
   4338             case ARMneon_VCVTUtoF:
   4339                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
   4340                                BITS4(1,Q,M,0), regM);
   4341                break;
   4342             case ARMneon_VCVTFtoFixedU:
   4343                sz1 = (sz >> 5) & 1;
   4344                sz2 = (sz >> 4) & 1;
   4345                sz &= 0xf;
   4346                insn = XXXXXXXX(0xF, X0011,
   4347                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   4348                                BITS4(0,Q,M,1), regM);
   4349                break;
   4350             case ARMneon_VCVTFtoFixedS:
   4351                sz1 = (sz >> 5) & 1;
   4352                sz2 = (sz >> 4) & 1;
   4353                sz &= 0xf;
   4354                insn = XXXXXXXX(0xF, X0010,
   4355                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
   4356                                BITS4(0,Q,M,1), regM);
   4357                break;
   4358             case ARMneon_VCVTFixedUtoF:
   4359                sz1 = (sz >> 5) & 1;
   4360                sz2 = (sz >> 4) & 1;
   4361                sz &= 0xf;
   4362                insn = XXXXXXXX(0xF, X0011,
   4363                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   4364                                BITS4(0,Q,M,1), regM);
   4365                break;
   4366             case ARMneon_VCVTFixedStoF:
   4367                sz1 = (sz >> 5) & 1;
   4368                sz2 = (sz >> 4) & 1;
   4369                sz &= 0xf;
   4370                insn = XXXXXXXX(0xF, X0010,
   4371                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
   4372                                BITS4(0,Q,M,1), regM);
   4373                break;
   4374             case ARMneon_VCVTF32toF16:
   4375                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
   4376                                BITS4(0,0,M,0), regM);
   4377                break;
   4378             case ARMneon_VCVTF16toF32:
   4379                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
   4380                                BITS4(0,0,M,0), regM);
   4381                break;
   4382             case ARMneon_VRECIP:
   4383                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   4384                                BITS4(0,Q,M,0), regM);
   4385                break;
   4386             case ARMneon_VRECIPF:
   4387                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   4388                                BITS4(0,Q,M,0), regM);
   4389                break;
   4390             case ARMneon_VABSFP:
   4391                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   4392                                BITS4(0,Q,M,0), regM);
   4393                break;
   4394             case ARMneon_VRSQRTEFP:
   4395                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
   4396                                BITS4(1,Q,M,0), regM);
   4397                break;
   4398             case ARMneon_VRSQRTE:
   4399                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
   4400                                BITS4(1,Q,M,0), regM);
   4401                break;
   4402             case ARMneon_VNEGF:
   4403                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
   4404                                BITS4(1,Q,M,0), regM);
   4405                break;
   4406 
   4407             default:
   4408                goto bad;
   4409          }
   4410          *p++ = insn;
   4411          goto done;
   4412       }
   4413       case ARMin_NDual: {
   4414          UInt Q = i->ARMin.NDual.Q ? 1 : 0;
   4415          UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
   4416                        ? (qregEnc(i->ARMin.NDual.arg1) << 1)
   4417                        : dregEnc(i->ARMin.NDual.arg1);
   4418          UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
   4419                        ? (qregEnc(i->ARMin.NDual.arg2) << 1)
   4420                        : dregEnc(i->ARMin.NDual.arg2);
   4421          UInt D = regD >> 4;
   4422          UInt M = regM >> 4;
   4423          UInt sz1 = i->ARMin.NDual.size >> 1;
   4424          UInt sz2 = i->ARMin.NDual.size & 1;
   4425          UInt insn;
   4426          regD &= 0xF;
   4427          regM &= 0xF;
   4428          switch (i->ARMin.NDual.op) {
   4429             case ARMneon_TRN: /* VTRN reg, reg */
   4430                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4431                                regD, X0000, BITS4(1,Q,M,0), regM);
   4432                break;
   4433             case ARMneon_ZIP: /* VZIP reg, reg */
   4434                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4435                                regD, X0001, BITS4(1,Q,M,0), regM);
   4436                break;
   4437             case ARMneon_UZP: /* VUZP reg, reg */
   4438                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
   4439                                regD, X0001, BITS4(0,Q,M,0), regM);
   4440                break;
   4441             default:
   4442                goto bad;
   4443          }
   4444          *p++ = insn;
   4445          goto done;
   4446       }
   4447       case ARMin_NBinary: {
   4448          UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
   4449          UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
   4450                        ? (qregEnc(i->ARMin.NBinary.dst) << 1)
   4451                        : dregEnc(i->ARMin.NBinary.dst);
   4452          UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
   4453                        ? (qregEnc(i->ARMin.NBinary.argL) << 1)
   4454                        : dregEnc(i->ARMin.NBinary.argL);
   4455          UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
   4456                        ? (qregEnc(i->ARMin.NBinary.argR) << 1)
   4457                        : dregEnc(i->ARMin.NBinary.argR);
   4458          UInt sz1 = i->ARMin.NBinary.size >> 1;
   4459          UInt sz2 = i->ARMin.NBinary.size & 1;
   4460          UInt D = regD >> 4;
   4461          UInt N = regN >> 4;
   4462          UInt M = regM >> 4;
   4463          UInt insn;
   4464          regD &= 0xF;
   4465          regM &= 0xF;
   4466          regN &= 0xF;
   4467          switch (i->ARMin.NBinary.op) {
   4468             case ARMneon_VAND: /* VAND reg, reg, reg */
   4469                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
   4470                                BITS4(N,Q,M,1), regM);
   4471                break;
   4472             case ARMneon_VORR: /* VORR reg, reg, reg*/
   4473                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
   4474                                BITS4(N,Q,M,1), regM);
   4475                break;
   4476             case ARMneon_VXOR: /* VEOR reg, reg, reg */
   4477                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
   4478                                BITS4(N,Q,M,1), regM);
   4479                break;
   4480             case ARMneon_VADD: /* VADD reg, reg, reg */
   4481                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4482                                X1000, BITS4(N,Q,M,0), regM);
   4483                break;
   4484             case ARMneon_VSUB: /* VSUB reg, reg, reg */
   4485                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4486                                X1000, BITS4(N,Q,M,0), regM);
   4487                break;
   4488             case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
   4489                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4490                                X0110, BITS4(N,Q,M,1), regM);
   4491                break;
   4492             case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
   4493                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4494                                X0110, BITS4(N,Q,M,1), regM);
   4495                break;
   4496             case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
   4497                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4498                                X0110, BITS4(N,Q,M,0), regM);
   4499                break;
   4500             case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
   4501                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4502                                X0110, BITS4(N,Q,M,0), regM);
   4503                break;
   4504             case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
   4505                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4506                                X0001, BITS4(N,Q,M,0), regM);
   4507                break;
   4508             case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
   4509                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4510                                X0001, BITS4(N,Q,M,0), regM);
   4511                break;
   4512             case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
   4513                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4514                                X0000, BITS4(N,Q,M,1), regM);
   4515                break;
   4516             case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
   4517                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4518                                X0000, BITS4(N,Q,M,1), regM);
   4519                break;
   4520             case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
   4521                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4522                                X0010, BITS4(N,Q,M,1), regM);
   4523                break;
   4524             case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
   4525                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4526                                X0010, BITS4(N,Q,M,1), regM);
   4527                break;
   4528             case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
   4529                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4530                                X0011, BITS4(N,Q,M,0), regM);
   4531                break;
   4532             case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
   4533                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4534                                X0011, BITS4(N,Q,M,0), regM);
   4535                break;
   4536             case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
   4537                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4538                                X0011, BITS4(N,Q,M,1), regM);
   4539                break;
   4540             case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
   4541                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4542                                X0011, BITS4(N,Q,M,1), regM);
   4543                break;
   4544             case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
   4545                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4546                                X1000, BITS4(N,Q,M,1), regM);
   4547                break;
   4548             case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
   4549                if (i->ARMin.NBinary.size >= 16)
   4550                   goto bad;
   4551                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
   4552                                i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
   4553                                regM);
   4554                break;
   4555             case ARMneon_VMUL:
   4556                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4557                                X1001, BITS4(N,Q,M,1), regM);
   4558                break;
   4559             case ARMneon_VMULLU:
   4560                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
   4561                                X1100, BITS4(N,0,M,0), regM);
   4562                break;
   4563             case ARMneon_VMULLS:
   4564                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4565                                X1100, BITS4(N,0,M,0), regM);
   4566                break;
   4567             case ARMneon_VMULP:
   4568                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4569                                X1001, BITS4(N,Q,M,1), regM);
   4570                break;
   4571             case ARMneon_VMULFP:
   4572                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4573                                X1101, BITS4(N,Q,M,1), regM);
   4574                break;
   4575             case ARMneon_VMULLP:
   4576                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4577                                X1110, BITS4(N,0,M,0), regM);
   4578                break;
   4579             case ARMneon_VQDMULH:
   4580                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4581                                X1011, BITS4(N,Q,M,0), regM);
   4582                break;
   4583             case ARMneon_VQRDMULH:
   4584                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4585                                X1011, BITS4(N,Q,M,0), regM);
   4586                break;
   4587             case ARMneon_VQDMULL:
   4588                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
   4589                                X1101, BITS4(N,0,M,0), regM);
   4590                break;
   4591             case ARMneon_VTBL:
   4592                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
   4593                                X1000, BITS4(N,0,M,0), regM);
   4594                break;
   4595             case ARMneon_VPADD:
   4596                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4597                                X1011, BITS4(N,Q,M,1), regM);
   4598                break;
   4599             case ARMneon_VPADDFP:
   4600                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4601                                X1101, BITS4(N,Q,M,0), regM);
   4602                break;
   4603             case ARMneon_VPMINU:
   4604                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4605                                X1010, BITS4(N,Q,M,1), regM);
   4606                break;
   4607             case ARMneon_VPMINS:
   4608                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4609                                X1010, BITS4(N,Q,M,1), regM);
   4610                break;
   4611             case ARMneon_VPMAXU:
   4612                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4613                                X1010, BITS4(N,Q,M,0), regM);
   4614                break;
   4615             case ARMneon_VPMAXS:
   4616                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4617                                X1010, BITS4(N,Q,M,0), regM);
   4618                break;
   4619             case ARMneon_VADDFP: /* VADD reg, reg, reg */
   4620                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   4621                                X1101, BITS4(N,Q,M,0), regM);
   4622                break;
   4623             case ARMneon_VSUBFP: /* VADD reg, reg, reg */
   4624                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   4625                                X1101, BITS4(N,Q,M,0), regM);
   4626                break;
   4627             case ARMneon_VABDFP: /* VABD reg, reg, reg */
   4628                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   4629                                X1101, BITS4(N,Q,M,0), regM);
   4630                break;
   4631             case ARMneon_VMINF:
   4632                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
   4633                                X1111, BITS4(N,Q,M,0), regM);
   4634                break;
   4635             case ARMneon_VMAXF:
   4636                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
   4637                                X1111, BITS4(N,Q,M,0), regM);
   4638                break;
   4639             case ARMneon_VPMINF:
   4640                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
   4641                                X1111, BITS4(N,Q,M,0), regM);
   4642                break;
   4643             case ARMneon_VPMAXF:
   4644                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
   4645                                X1111, BITS4(N,Q,M,0), regM);
   4646                break;
   4647             case ARMneon_VRECPS:
   4648                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
   4649                                BITS4(N,Q,M,1), regM);
   4650                break;
   4651             case ARMneon_VCGTF:
   4652                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
   4653                                BITS4(N,Q,M,0), regM);
   4654                break;
   4655             case ARMneon_VCGEF:
   4656                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
   4657                                BITS4(N,Q,M,0), regM);
   4658                break;
   4659             case ARMneon_VCEQF:
   4660                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
   4661                                BITS4(N,Q,M,0), regM);
   4662                break;
   4663             case ARMneon_VRSQRTS:
   4664                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
   4665                                BITS4(N,Q,M,1), regM);
   4666                break;
   4667             default:
   4668                goto bad;
   4669          }
   4670          *p++ = insn;
   4671          goto done;
   4672       }
   4673       case ARMin_NShift: {
   4674          UInt Q = i->ARMin.NShift.Q ? 1 : 0;
   4675          UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
   4676                        ? (qregEnc(i->ARMin.NShift.dst) << 1)
   4677                        : dregEnc(i->ARMin.NShift.dst);
   4678          UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
   4679                        ? (qregEnc(i->ARMin.NShift.argL) << 1)
   4680                        : dregEnc(i->ARMin.NShift.argL);
   4681          UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
   4682                        ? (qregEnc(i->ARMin.NShift.argR) << 1)
   4683                        : dregEnc(i->ARMin.NShift.argR);
   4684          UInt sz1 = i->ARMin.NShift.size >> 1;
   4685          UInt sz2 = i->ARMin.NShift.size & 1;
   4686          UInt D = regD >> 4;
   4687          UInt N = regN >> 4;
   4688          UInt M = regM >> 4;
   4689          UInt insn;
   4690          regD &= 0xF;
   4691          regM &= 0xF;
   4692          regN &= 0xF;
   4693          switch (i->ARMin.NShift.op) {
   4694             case ARMneon_VSHL:
   4695                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4696                                X0100, BITS4(N,Q,M,0), regM);
   4697                break;
   4698             case ARMneon_VSAL:
   4699                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4700                                X0100, BITS4(N,Q,M,0), regM);
   4701                break;
   4702             case ARMneon_VQSHL:
   4703                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
   4704                                X0100, BITS4(N,Q,M,1), regM);
   4705                break;
   4706             case ARMneon_VQSAL:
   4707                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
   4708                                X0100, BITS4(N,Q,M,1), regM);
   4709                break;
   4710             default:
   4711                goto bad;
   4712          }
   4713          *p++ = insn;
   4714          goto done;
   4715       }
   4716       case ARMin_NShl64: {
   4717          HReg regDreg = i->ARMin.NShl64.dst;
   4718          HReg regMreg = i->ARMin.NShl64.src;
   4719          UInt amt     = i->ARMin.NShl64.amt;
   4720          vassert(amt >= 1 && amt <= 63);
   4721          vassert(hregClass(regDreg) == HRcFlt64);
   4722          vassert(hregClass(regMreg) == HRcFlt64);
   4723          UInt regD = dregEnc(regDreg);
   4724          UInt regM = dregEnc(regMreg);
   4725          UInt D    = (regD >> 4) & 1;
   4726          UInt Vd   = regD & 0xF;
   4727          UInt L    = 1;
   4728          UInt Q    = 0; /* always 64-bit */
   4729          UInt M    = (regM >> 4) & 1;
   4730          UInt Vm   = regM & 0xF;
   4731          UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
   4732                               amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
   4733          *p++ = insn;
   4734          goto done;
   4735       }
   4736       case ARMin_NeonImm: {
   4737          UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
   4738          UInt regD = Q ? (qregEnc(i->ARMin.NeonImm.dst) << 1) :
   4739                           dregEnc(i->ARMin.NeonImm.dst);
   4740          UInt D = regD >> 4;
   4741          UInt imm = i->ARMin.NeonImm.imm->imm8;
   4742          UInt tp = i->ARMin.NeonImm.imm->type;
   4743          UInt j = imm >> 7;
   4744          UInt imm3 = (imm >> 4) & 0x7;
   4745          UInt imm4 = imm & 0xF;
   4746          UInt cmode, op;
   4747          UInt insn;
   4748          regD &= 0xF;
   4749          if (tp == 9)
   4750             op = 1;
   4751          else
   4752             op = 0;
   4753          switch (tp) {
   4754             case 0:
   4755             case 1:
   4756             case 2:
   4757             case 3:
   4758             case 4:
   4759             case 5:
   4760                cmode = tp << 1;
   4761                break;
   4762             case 9:
   4763             case 6:
   4764                cmode = 14;
   4765                break;
   4766             case 7:
   4767                cmode = 12;
   4768                break;
   4769             case 8:
   4770                cmode = 13;
   4771                break;
   4772             case 10:
   4773                cmode = 15;
   4774                break;
   4775             default:
   4776                vpanic("ARMin_NeonImm");
   4777 
   4778          }
   4779          insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
   4780                          cmode, BITS4(0,Q,op,1), imm4);
   4781          *p++ = insn;
   4782          goto done;
   4783       }
   4784       case ARMin_NCMovQ: {
   4785          UInt cc = (UInt)i->ARMin.NCMovQ.cond;
   4786          UInt qM = qregEnc(i->ARMin.NCMovQ.src) << 1;
   4787          UInt qD = qregEnc(i->ARMin.NCMovQ.dst) << 1;
   4788          UInt vM = qM & 0xF;
   4789          UInt vD = qD & 0xF;
   4790          UInt M  = (qM >> 4) & 1;
   4791          UInt D  = (qD >> 4) & 1;
   4792          vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
   4793          /* b!cc here+8: !cc A00 0000 */
   4794          UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
   4795          *p++ = insn;
   4796          /* vmov qD, qM */
   4797          insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
   4798                          vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
   4799          *p++ = insn;
   4800          goto done;
   4801       }
   4802       case ARMin_Add32: {
   4803          UInt regD = iregEnc(i->ARMin.Add32.rD);
   4804          UInt regN = iregEnc(i->ARMin.Add32.rN);
   4805          UInt imm32 = i->ARMin.Add32.imm32;
   4806          vassert(regD != regN);
   4807          /* MOV regD, imm32 */
   4808          p = imm32_to_ireg((UInt *)p, regD, imm32);
   4809          /* ADD regD, regN, regD */
   4810          UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
   4811          *p++ = insn;
   4812          goto done;
   4813       }
   4814 
   4815       case ARMin_EvCheck: {
   4816          /* We generate:
   4817                ldr  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
   4818                subs r12, r12, #1  (A1)
   4819                str  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
   4820                bpl  nofail
   4821                ldr  r12, [r8 + #0]   0 == offsetof(host_EvC_FAILADDR)
   4822                bx   r12
   4823               nofail:
   4824          */
   4825          UInt* p0 = p;
   4826          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
   4827                                 i->ARMin.EvCheck.amCounter);
   4828          *p++ = 0xE25CC001; /* subs r12, r12, #1 */
   4829          p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
   4830                                 i->ARMin.EvCheck.amCounter);
   4831          *p++ = 0x5A000001; /* bpl nofail */
   4832          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
   4833                                 i->ARMin.EvCheck.amFailAddr);
   4834          *p++ = 0xE12FFF1C; /* bx r12 */
   4835          /* nofail: */
   4836 
   4837          /* Crosscheck */
   4838          vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
   4839          goto done;
   4840       }
   4841 
   4842       case ARMin_ProfInc: {
   4843          /* We generate:
   4844               (ctrP is unknown now, so use 0x65556555 in the
   4845               expectation that a later call to LibVEX_patchProfCtr
   4846               will be used to fill in the immediate fields once the
   4847               right value is known.)
   4848             movw r12, lo16(0x65556555)
   4849             movt r12, lo16(0x65556555)
   4850             ldr  r11, [r12]
   4851             adds r11, r11, #1
   4852             str  r11, [r12]
   4853             ldr  r11, [r12+4]
   4854             adc  r11, r11, #0
   4855             str  r11, [r12+4]
   4856          */
   4857          p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
   4858          *p++ = 0xE59CB000;
   4859          *p++ = 0xE29BB001;
   4860          *p++ = 0xE58CB000;
   4861          *p++ = 0xE59CB004;
   4862          *p++ = 0xE2ABB000;
   4863          *p++ = 0xE58CB004;
   4864          /* Tell the caller .. */
   4865          vassert(!(*is_profInc));
   4866          *is_profInc = True;
   4867          goto done;
   4868       }
   4869 
   4870       /* ... */
   4871       default:
   4872          goto bad;
   4873     }
   4874 
   4875   bad:
   4876    ppARMInstr(i);
   4877    vpanic("emit_ARMInstr");
   4878    /*NOTREACHED*/
   4879 
   4880   done:
   4881    vassert(((UChar*)p) - &buf[0] <= 32);
   4882    return ((UChar*)p) - &buf[0];
   4883 }
   4884 
   4885 
   4886 /* How big is an event check?  See case for ARMin_EvCheck in
   4887    emit_ARMInstr just above.  That crosschecks what this returns, so
   4888    we can tell if we're inconsistent. */
   4889 Int evCheckSzB_ARM (void)
   4890 {
   4891    return 24;
   4892 }
   4893 
   4894 
   4895 /* NB: what goes on here has to be very closely coordinated with the
   4896    emitInstr case for XDirect, above. */
   4897 VexInvalRange chainXDirect_ARM ( VexEndness endness_host,
   4898                                  void* place_to_chain,
   4899                                  const void* disp_cp_chain_me_EXPECTED,
   4900                                  const void* place_to_jump_to )
   4901 {
   4902    vassert(endness_host == VexEndnessLE);
   4903 
   4904    /* What we're expecting to see is:
   4905         movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
   4906         movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
   4907         blx  r12
   4908       viz
   4909         <8 bytes generated by imm32_to_ireg_EXACTLY2>
   4910         E1 2F FF 3C
   4911    */
   4912    UInt* p = (UInt*)place_to_chain;
   4913    vassert(0 == (3 & (HWord)p));
   4914    vassert(is_imm32_to_ireg_EXACTLY2(
   4915               p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
   4916    vassert(p[2] == 0xE12FFF3C);
   4917    /* And what we want to change it to is either:
   4918         (general case)
   4919           movw r12, lo16(place_to_jump_to)
   4920           movt r12, hi16(place_to_jump_to)
   4921           bx   r12
   4922         viz
   4923           <8 bytes generated by imm32_to_ireg_EXACTLY2>
   4924           E1 2F FF 1C
   4925       ---OR---
   4926         in the case where the displacement falls within 26 bits
   4927           b disp24; undef; undef
   4928         viz
   4929           EA <3 bytes == disp24>
   4930           FF 00 00 00
   4931           FF 00 00 00
   4932 
   4933       In both cases the replacement has the same length as the original.
   4934       To remain sane & verifiable,
   4935       (1) limit the displacement for the short form to
   4936           (say) +/- 30 million, so as to avoid wraparound
   4937           off-by-ones
   4938       (2) even if the short form is applicable, once every (say)
   4939           1024 times use the long form anyway, so as to maintain
   4940           verifiability
   4941    */
   4942 
   4943    /* This is the delta we need to put into a B insn.  It's relative
   4944       to the start of the next-but-one insn, hence the -8.  */
   4945    Long delta   = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
   4946    Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
   4947    vassert(0 == (delta & (Long)3));
   4948 
   4949    static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
   4950    if (shortOK) {
   4951       shortCTR++; // thread safety bleh
   4952       if (0 == (shortCTR & 0x3FF)) {
   4953          shortOK = False;
   4954          if (0)
   4955             vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
   4956                        "using long form\n", shortCTR);
   4957       }
   4958    }
   4959 
   4960    /* And make the modifications. */
   4961    if (shortOK) {
   4962       UInt uimm24      = (UInt)(delta >> 2);
   4963       UInt uimm24_shl8 = uimm24 << 8;
   4964       Int  simm24      = (Int)uimm24_shl8;
   4965       simm24 >>= 8;
   4966       vassert(uimm24 == simm24);
   4967       p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
   4968       p[1] = 0xFF000000;
   4969       p[2] = 0xFF000000;
   4970    } else {
   4971       (void)imm32_to_ireg_EXACTLY2(
   4972                p, /*r*/12, (UInt)(Addr)place_to_jump_to);
   4973       p[2] = 0xE12FFF1C;
   4974    }
   4975 
   4976    VexInvalRange vir = {(HWord)p, 12};
   4977    return vir;
   4978 }
   4979 
   4980 
   4981 /* NB: what goes on here has to be very closely coordinated with the
   4982    emitInstr case for XDirect, above. */
   4983 VexInvalRange unchainXDirect_ARM ( VexEndness endness_host,
   4984                                    void* place_to_unchain,
   4985                                    const void* place_to_jump_to_EXPECTED,
   4986                                    const void* disp_cp_chain_me )
   4987 {
   4988    vassert(endness_host == VexEndnessLE);
   4989 
   4990    /* What we're expecting to see is:
   4991         (general case)
   4992           movw r12, lo16(place_to_jump_to_EXPECTED)
   4993           movt r12, lo16(place_to_jump_to_EXPECTED)
   4994           bx   r12
   4995         viz
   4996           <8 bytes generated by imm32_to_ireg_EXACTLY2>
   4997           E1 2F FF 1C
   4998       ---OR---
   4999         in the case where the displacement falls within 26 bits
   5000           b disp24; undef; undef
   5001         viz
   5002           EA <3 bytes == disp24>
   5003           FF 00 00 00
   5004           FF 00 00 00
   5005    */
   5006    UInt* p = (UInt*)place_to_unchain;
   5007    vassert(0 == (3 & (HWord)p));
   5008 
   5009    Bool valid = False;
   5010    if (is_imm32_to_ireg_EXACTLY2(
   5011           p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
   5012        && p[2] == 0xE12FFF1C) {
   5013       valid = True; /* it's the long form */
   5014       if (0)
   5015          vex_printf("QQQ unchainXDirect_ARM: found long form\n");
   5016    } else
   5017    if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
   5018       /* It's the short form.  Check the displacement is right. */
   5019       Int simm24 = p[0] & 0x00FFFFFF;
   5020       simm24 <<= 8; simm24 >>= 8;
   5021       if ((UChar*)p + (simm24 << 2) + 8 == place_to_jump_to_EXPECTED) {
   5022          valid = True;
   5023          if (0)
   5024             vex_printf("QQQ unchainXDirect_ARM: found short form\n");
   5025       }
   5026    }
   5027    vassert(valid);
   5028 
   5029    /* And what we want to change it to is:
   5030         movw r12, lo16(disp_cp_chain_me)
   5031         movt r12, hi16(disp_cp_chain_me)
   5032         blx  r12
   5033       viz
   5034         <8 bytes generated by imm32_to_ireg_EXACTLY2>
   5035         E1 2F FF 3C
   5036    */
   5037    (void)imm32_to_ireg_EXACTLY2(
   5038             p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
   5039    p[2] = 0xE12FFF3C;
   5040    VexInvalRange vir = {(HWord)p, 12};
   5041    return vir;
   5042 }
   5043 
   5044 
   5045 /* Patch the counter address into a profile inc point, as previously
   5046    created by the ARMin_ProfInc case for emit_ARMInstr. */
   5047 VexInvalRange patchProfInc_ARM ( VexEndness endness_host,
   5048                                  void*  place_to_patch,
   5049                                  const ULong* location_of_counter )
   5050 {
   5051    vassert(endness_host == VexEndnessLE);
   5052    vassert(sizeof(ULong*) == 4);
   5053    UInt* p = (UInt*)place_to_patch;
   5054    vassert(0 == (3 & (HWord)p));
   5055    vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
   5056    vassert(p[2] == 0xE59CB000);
   5057    vassert(p[3] == 0xE29BB001);
   5058    vassert(p[4] == 0xE58CB000);
   5059    vassert(p[5] == 0xE59CB004);
   5060    vassert(p[6] == 0xE2ABB000);
   5061    vassert(p[7] == 0xE58CB004);
   5062    imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
   5063    VexInvalRange vir = {(HWord)p, 8};
   5064    return vir;
   5065 }
   5066 
   5067 
   5068 #undef BITS4
   5069 #undef X0000
   5070 #undef X0001
   5071 #undef X0010
   5072 #undef X0011
   5073 #undef X0100
   5074 #undef X0101
   5075 #undef X0110
   5076 #undef X0111
   5077 #undef X1000
   5078 #undef X1001
   5079 #undef X1010
   5080 #undef X1011
   5081 #undef X1100
   5082 #undef X1101
   5083 #undef X1110
   5084 #undef X1111
   5085 #undef XXXXX___
   5086 #undef XXXXXX__
   5087 #undef XXX___XX
   5088 #undef XXXXX__X
   5089 #undef XXXXXXXX
   5090 #undef XX______
   5091 
   5092 /*---------------------------------------------------------------*/
   5093 /*--- end                                     host_arm_defs.c ---*/
   5094 /*---------------------------------------------------------------*/
   5095