Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                 host_amd64_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_amd64_defs.h"
     43 
     44 
     45 /* --------- Registers. --------- */
     46 
     47 void ppHRegAMD64 ( HReg reg )
     48 {
     49    Int r;
     50    static HChar* ireg64_names[16]
     51      = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
     52          "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
     53    /* Be generic for all virtual regs. */
     54    if (hregIsVirtual(reg)) {
     55       ppHReg(reg);
     56       return;
     57    }
     58    /* But specific for real regs. */
     59    switch (hregClass(reg)) {
     60       case HRcInt64:
     61          r = hregNumber(reg);
     62          vassert(r >= 0 && r < 16);
     63          vex_printf("%s", ireg64_names[r]);
     64          return;
     65       case HRcFlt64:
     66          r = hregNumber(reg);
     67          vassert(r >= 0 && r < 6);
     68          vex_printf("%%fake%d", r);
     69          return;
     70       case HRcVec128:
     71          r = hregNumber(reg);
     72          vassert(r >= 0 && r < 16);
     73          vex_printf("%%xmm%d", r);
     74          return;
     75       default:
     76          vpanic("ppHRegAMD64");
     77    }
     78 }
     79 
     80 static void ppHRegAMD64_lo32 ( HReg reg )
     81 {
     82    Int r;
     83    static HChar* ireg32_names[16]
     84      = { "%eax",  "%ecx",  "%edx",  "%ebx",  "%esp",  "%ebp",  "%esi",  "%edi",
     85          "%r8d",  "%r9d",  "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
     86    /* Be generic for all virtual regs. */
     87    if (hregIsVirtual(reg)) {
     88       ppHReg(reg);
     89       vex_printf("d");
     90       return;
     91    }
     92    /* But specific for real regs. */
     93    switch (hregClass(reg)) {
     94       case HRcInt64:
     95          r = hregNumber(reg);
     96          vassert(r >= 0 && r < 16);
     97          vex_printf("%s", ireg32_names[r]);
     98          return;
     99       default:
    100          vpanic("ppHRegAMD64_lo32: invalid regclass");
    101    }
    102 }
    103 
    104 HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); }
    105 HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); }
    106 HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); }
    107 HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); }
    108 HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); }
    109 HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); }
    110 HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); }
    111 HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); }
    112 HReg hregAMD64_R8  ( void ) { return mkHReg( 8, HRcInt64, False); }
    113 HReg hregAMD64_R9  ( void ) { return mkHReg( 9, HRcInt64, False); }
    114 HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); }
    115 HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); }
    116 HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); }
    117 HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); }
    118 HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
    119 HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
    120 
    121 //.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
    122 //.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
    123 //.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
    124 //.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
    125 //.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
    126 //.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
    127 //..
    128 HReg hregAMD64_XMM0  ( void ) { return mkHReg( 0, HRcVec128, False); }
    129 HReg hregAMD64_XMM1  ( void ) { return mkHReg( 1, HRcVec128, False); }
    130 HReg hregAMD64_XMM2  ( void ) { return mkHReg( 2, HRcVec128, False); }
    131 HReg hregAMD64_XMM3  ( void ) { return mkHReg( 3, HRcVec128, False); }
    132 HReg hregAMD64_XMM4  ( void ) { return mkHReg( 4, HRcVec128, False); }
    133 HReg hregAMD64_XMM5  ( void ) { return mkHReg( 5, HRcVec128, False); }
    134 HReg hregAMD64_XMM6  ( void ) { return mkHReg( 6, HRcVec128, False); }
    135 HReg hregAMD64_XMM7  ( void ) { return mkHReg( 7, HRcVec128, False); }
    136 HReg hregAMD64_XMM8  ( void ) { return mkHReg( 8, HRcVec128, False); }
    137 HReg hregAMD64_XMM9  ( void ) { return mkHReg( 9, HRcVec128, False); }
    138 HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
    139 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
    140 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
    141 HReg hregAMD64_XMM13 ( void ) { return mkHReg(13, HRcVec128, False); }
    142 HReg hregAMD64_XMM14 ( void ) { return mkHReg(14, HRcVec128, False); }
    143 HReg hregAMD64_XMM15 ( void ) { return mkHReg(15, HRcVec128, False); }
    144 
    145 
    146 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
    147 {
    148 #if 0
    149    *nregs = 6;
    150    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    151    (*arr)[ 0] = hregAMD64_RSI();
    152    (*arr)[ 1] = hregAMD64_RDI();
    153    (*arr)[ 2] = hregAMD64_RBX();
    154 
    155    (*arr)[ 3] = hregAMD64_XMM7();
    156    (*arr)[ 4] = hregAMD64_XMM8();
    157    (*arr)[ 5] = hregAMD64_XMM9();
    158 #endif
    159 #if 1
    160    *nregs = 20;
    161    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    162    (*arr)[ 0] = hregAMD64_RSI();
    163    (*arr)[ 1] = hregAMD64_RDI();
    164    (*arr)[ 2] = hregAMD64_R8();
    165    (*arr)[ 3] = hregAMD64_R9();
    166    (*arr)[ 4] = hregAMD64_R12();
    167    (*arr)[ 5] = hregAMD64_R13();
    168    (*arr)[ 6] = hregAMD64_R14();
    169    (*arr)[ 7] = hregAMD64_R15();
    170    (*arr)[ 8] = hregAMD64_RBX();
    171 
    172    (*arr)[ 9] = hregAMD64_XMM3();
    173    (*arr)[10] = hregAMD64_XMM4();
    174    (*arr)[11] = hregAMD64_XMM5();
    175    (*arr)[12] = hregAMD64_XMM6();
    176    (*arr)[13] = hregAMD64_XMM7();
    177    (*arr)[14] = hregAMD64_XMM8();
    178    (*arr)[15] = hregAMD64_XMM9();
    179    (*arr)[16] = hregAMD64_XMM10();
    180    (*arr)[17] = hregAMD64_XMM11();
    181    (*arr)[18] = hregAMD64_XMM12();
    182    (*arr)[19] = hregAMD64_R10();
    183 #endif
    184 }
    185 
    186 
    187 /* --------- Condition codes, Intel encoding. --------- */
    188 
    189 HChar* showAMD64CondCode ( AMD64CondCode cond )
    190 {
    191    switch (cond) {
    192       case Acc_O:      return "o";
    193       case Acc_NO:     return "no";
    194       case Acc_B:      return "b";
    195       case Acc_NB:     return "nb";
    196       case Acc_Z:      return "z";
    197       case Acc_NZ:     return "nz";
    198       case Acc_BE:     return "be";
    199       case Acc_NBE:    return "nbe";
    200       case Acc_S:      return "s";
    201       case Acc_NS:     return "ns";
    202       case Acc_P:      return "p";
    203       case Acc_NP:     return "np";
    204       case Acc_L:      return "l";
    205       case Acc_NL:     return "nl";
    206       case Acc_LE:     return "le";
    207       case Acc_NLE:    return "nle";
    208       case Acc_ALWAYS: return "ALWAYS";
    209       default: vpanic("ppAMD64CondCode");
    210    }
    211 }
    212 
    213 
    214 /* --------- AMD64AMode: memory address expressions. --------- */
    215 
    216 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
    217    AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
    218    am->tag        = Aam_IR;
    219    am->Aam.IR.imm = imm32;
    220    am->Aam.IR.reg = reg;
    221    return am;
    222 }
    223 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
    224    AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
    225    am->tag = Aam_IRRS;
    226    am->Aam.IRRS.imm   = imm32;
    227    am->Aam.IRRS.base  = base;
    228    am->Aam.IRRS.index = indEx;
    229    am->Aam.IRRS.shift = shift;
    230    vassert(shift >= 0 && shift <= 3);
    231    return am;
    232 }
    233 
    234 //.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) {
    235 //..    switch (am->tag) {
    236 //..       case Xam_IR:
    237 //..          return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
    238 //..       case Xam_IRRS:
    239 //..          return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
    240 //..                                am->Xam.IRRS.index, am->Xam.IRRS.shift );
    241 //..       default:
    242 //..          vpanic("dopyAMD64AMode");
    243 //..    }
    244 //.. }
    245 
    246 void ppAMD64AMode ( AMD64AMode* am ) {
    247    switch (am->tag) {
    248       case Aam_IR:
    249          if (am->Aam.IR.imm == 0)
    250             vex_printf("(");
    251          else
    252             vex_printf("0x%x(", am->Aam.IR.imm);
    253          ppHRegAMD64(am->Aam.IR.reg);
    254          vex_printf(")");
    255          return;
    256       case Aam_IRRS:
    257          vex_printf("0x%x(", am->Aam.IRRS.imm);
    258          ppHRegAMD64(am->Aam.IRRS.base);
    259          vex_printf(",");
    260          ppHRegAMD64(am->Aam.IRRS.index);
    261          vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
    262          return;
    263       default:
    264          vpanic("ppAMD64AMode");
    265    }
    266 }
    267 
    268 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
    269    switch (am->tag) {
    270       case Aam_IR:
    271          addHRegUse(u, HRmRead, am->Aam.IR.reg);
    272          return;
    273       case Aam_IRRS:
    274          addHRegUse(u, HRmRead, am->Aam.IRRS.base);
    275          addHRegUse(u, HRmRead, am->Aam.IRRS.index);
    276          return;
    277       default:
    278          vpanic("addRegUsage_AMD64AMode");
    279    }
    280 }
    281 
    282 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
    283    switch (am->tag) {
    284       case Aam_IR:
    285          am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
    286          return;
    287       case Aam_IRRS:
    288          am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
    289          am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
    290          return;
    291       default:
    292          vpanic("mapRegs_AMD64AMode");
    293    }
    294 }
    295 
    296 /* --------- Operand, which can be reg, immediate or memory. --------- */
    297 
    298 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
    299    AMD64RMI* op       = LibVEX_Alloc(sizeof(AMD64RMI));
    300    op->tag            = Armi_Imm;
    301    op->Armi.Imm.imm32 = imm32;
    302    return op;
    303 }
    304 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
    305    AMD64RMI* op     = LibVEX_Alloc(sizeof(AMD64RMI));
    306    op->tag          = Armi_Reg;
    307    op->Armi.Reg.reg = reg;
    308    return op;
    309 }
    310 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
    311    AMD64RMI* op    = LibVEX_Alloc(sizeof(AMD64RMI));
    312    op->tag         = Armi_Mem;
    313    op->Armi.Mem.am = am;
    314    return op;
    315 }
    316 
    317 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
    318    switch (op->tag) {
    319       case Armi_Imm:
    320          vex_printf("$0x%x", op->Armi.Imm.imm32);
    321          return;
    322       case Armi_Reg:
    323          if (lo32)
    324             ppHRegAMD64_lo32(op->Armi.Reg.reg);
    325          else
    326             ppHRegAMD64(op->Armi.Reg.reg);
    327          return;
    328       case Armi_Mem:
    329          ppAMD64AMode(op->Armi.Mem.am);
    330          return;
    331      default:
    332          vpanic("ppAMD64RMI");
    333    }
    334 }
    335 void ppAMD64RMI ( AMD64RMI* op ) {
    336    ppAMD64RMI_wrk(op, False/*!lo32*/);
    337 }
    338 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
    339    ppAMD64RMI_wrk(op, True/*lo32*/);
    340 }
    341 
    342 /* An AMD64RMI can only be used in a "read" context (what would it mean
    343    to write or modify a literal?) and so we enumerate its registers
    344    accordingly. */
    345 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
    346    switch (op->tag) {
    347       case Armi_Imm:
    348          return;
    349       case Armi_Reg:
    350          addHRegUse(u, HRmRead, op->Armi.Reg.reg);
    351          return;
    352       case Armi_Mem:
    353          addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
    354          return;
    355       default:
    356          vpanic("addRegUsage_AMD64RMI");
    357    }
    358 }
    359 
    360 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
    361    switch (op->tag) {
    362       case Armi_Imm:
    363          return;
    364       case Armi_Reg:
    365          op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
    366          return;
    367       case Armi_Mem:
    368          mapRegs_AMD64AMode(m, op->Armi.Mem.am);
    369          return;
    370       default:
    371          vpanic("mapRegs_AMD64RMI");
    372    }
    373 }
    374 
    375 
    376 /* --------- Operand, which can be reg or immediate only. --------- */
    377 
    378 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
    379    AMD64RI* op       = LibVEX_Alloc(sizeof(AMD64RI));
    380    op->tag           = Ari_Imm;
    381    op->Ari.Imm.imm32 = imm32;
    382    return op;
    383 }
    384 AMD64RI* AMD64RI_Reg ( HReg reg ) {
    385    AMD64RI* op     = LibVEX_Alloc(sizeof(AMD64RI));
    386    op->tag         = Ari_Reg;
    387    op->Ari.Reg.reg = reg;
    388    return op;
    389 }
    390 
    391 void ppAMD64RI ( AMD64RI* op ) {
    392    switch (op->tag) {
    393       case Ari_Imm:
    394          vex_printf("$0x%x", op->Ari.Imm.imm32);
    395          return;
    396       case Ari_Reg:
    397          ppHRegAMD64(op->Ari.Reg.reg);
    398          return;
    399      default:
    400          vpanic("ppAMD64RI");
    401    }
    402 }
    403 
    404 /* An AMD64RI can only be used in a "read" context (what would it mean
    405    to write or modify a literal?) and so we enumerate its registers
    406    accordingly. */
    407 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
    408    switch (op->tag) {
    409       case Ari_Imm:
    410          return;
    411       case Ari_Reg:
    412          addHRegUse(u, HRmRead, op->Ari.Reg.reg);
    413          return;
    414       default:
    415          vpanic("addRegUsage_AMD64RI");
    416    }
    417 }
    418 
    419 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
    420    switch (op->tag) {
    421       case Ari_Imm:
    422          return;
    423       case Ari_Reg:
    424          op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
    425          return;
    426       default:
    427          vpanic("mapRegs_AMD64RI");
    428    }
    429 }
    430 
    431 
    432 /* --------- Operand, which can be reg or memory only. --------- */
    433 
    434 AMD64RM* AMD64RM_Reg ( HReg reg ) {
    435    AMD64RM* op       = LibVEX_Alloc(sizeof(AMD64RM));
    436    op->tag         = Arm_Reg;
    437    op->Arm.Reg.reg = reg;
    438    return op;
    439 }
    440 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
    441    AMD64RM* op    = LibVEX_Alloc(sizeof(AMD64RM));
    442    op->tag        = Arm_Mem;
    443    op->Arm.Mem.am = am;
    444    return op;
    445 }
    446 
    447 void ppAMD64RM ( AMD64RM* op ) {
    448    switch (op->tag) {
    449       case Arm_Mem:
    450          ppAMD64AMode(op->Arm.Mem.am);
    451          return;
    452       case Arm_Reg:
    453          ppHRegAMD64(op->Arm.Reg.reg);
    454          return;
    455      default:
    456          vpanic("ppAMD64RM");
    457    }
    458 }
    459 
    460 /* Because an AMD64RM can be both a source or destination operand, we
    461    have to supply a mode -- pertaining to the operand as a whole --
    462    indicating how it's being used. */
    463 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
    464    switch (op->tag) {
    465       case Arm_Mem:
    466          /* Memory is read, written or modified.  So we just want to
    467             know the regs read by the amode. */
    468          addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
    469          return;
    470       case Arm_Reg:
    471          /* reg is read, written or modified.  Add it in the
    472             appropriate way. */
    473          addHRegUse(u, mode, op->Arm.Reg.reg);
    474          return;
    475      default:
    476          vpanic("addRegUsage_AMD64RM");
    477    }
    478 }
    479 
    480 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
    481 {
    482    switch (op->tag) {
    483       case Arm_Mem:
    484          mapRegs_AMD64AMode(m, op->Arm.Mem.am);
    485          return;
    486       case Arm_Reg:
    487          op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
    488          return;
    489      default:
    490          vpanic("mapRegs_AMD64RM");
    491    }
    492 }
    493 
    494 
    495 /* --------- Instructions. --------- */
    496 
    497 static HChar* showAMD64ScalarSz ( Int sz ) {
    498    switch (sz) {
    499       case 2: return "w";
    500       case 4: return "l";
    501       case 8: return "q";
    502       default: vpanic("showAMD64ScalarSz");
    503    }
    504 }
    505 
    506 HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
    507    switch (op) {
    508       case Aun_NOT: return "not";
    509       case Aun_NEG: return "neg";
    510       default: vpanic("showAMD64UnaryOp");
    511    }
    512 }
    513 
    514 HChar* showAMD64AluOp ( AMD64AluOp op ) {
    515    switch (op) {
    516       case Aalu_MOV:  return "mov";
    517       case Aalu_CMP:  return "cmp";
    518       case Aalu_ADD:  return "add";
    519       case Aalu_SUB:  return "sub";
    520       case Aalu_ADC:  return "adc";
    521       case Aalu_SBB:  return "sbb";
    522       case Aalu_AND:  return "and";
    523       case Aalu_OR:   return "or";
    524       case Aalu_XOR:  return "xor";
    525       case Aalu_MUL:  return "imul";
    526       default: vpanic("showAMD64AluOp");
    527    }
    528 }
    529 
    530 HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
    531    switch (op) {
    532       case Ash_SHL: return "shl";
    533       case Ash_SHR: return "shr";
    534       case Ash_SAR: return "sar";
    535       default: vpanic("showAMD64ShiftOp");
    536    }
    537 }
    538 
    539 HChar* showA87FpOp ( A87FpOp op ) {
    540    switch (op) {
    541 //..       case Xfp_ADD:    return "add";
    542 //..       case Xfp_SUB:    return "sub";
    543 //..       case Xfp_MUL:    return "mul";
    544 //..       case Xfp_DIV:    return "div";
    545       case Afp_SCALE:  return "scale";
    546       case Afp_ATAN:   return "atan";
    547       case Afp_YL2X:   return "yl2x";
    548       case Afp_YL2XP1: return "yl2xp1";
    549       case Afp_PREM:   return "prem";
    550       case Afp_PREM1:  return "prem1";
    551       case Afp_SQRT:   return "sqrt";
    552 //..       case Xfp_ABS:    return "abs";
    553 //..       case Xfp_NEG:    return "chs";
    554 //..       case Xfp_MOV:    return "mov";
    555       case Afp_SIN:    return "sin";
    556       case Afp_COS:    return "cos";
    557       case Afp_TAN:    return "tan";
    558       case Afp_ROUND:  return "round";
    559       case Afp_2XM1:   return "2xm1";
    560       default: vpanic("showA87FpOp");
    561    }
    562 }
    563 
    564 HChar* showAMD64SseOp ( AMD64SseOp op ) {
    565    switch (op) {
    566       case Asse_MOV:      return "movups";
    567       case Asse_ADDF:     return "add";
    568       case Asse_SUBF:     return "sub";
    569       case Asse_MULF:     return "mul";
    570       case Asse_DIVF:     return "div";
    571       case Asse_MAXF:     return "max";
    572       case Asse_MINF:     return "min";
    573       case Asse_CMPEQF:   return "cmpFeq";
    574       case Asse_CMPLTF:   return "cmpFlt";
    575       case Asse_CMPLEF:   return "cmpFle";
    576       case Asse_CMPUNF:   return "cmpFun";
    577       case Asse_RCPF:     return "rcp";
    578       case Asse_RSQRTF:   return "rsqrt";
    579       case Asse_SQRTF:    return "sqrt";
    580       case Asse_AND:      return "and";
    581       case Asse_OR:       return "or";
    582       case Asse_XOR:      return "xor";
    583       case Asse_ANDN:     return "andn";
    584       case Asse_ADD8:     return "paddb";
    585       case Asse_ADD16:    return "paddw";
    586       case Asse_ADD32:    return "paddd";
    587       case Asse_ADD64:    return "paddq";
    588       case Asse_QADD8U:   return "paddusb";
    589       case Asse_QADD16U:  return "paddusw";
    590       case Asse_QADD8S:   return "paddsb";
    591       case Asse_QADD16S:  return "paddsw";
    592       case Asse_SUB8:     return "psubb";
    593       case Asse_SUB16:    return "psubw";
    594       case Asse_SUB32:    return "psubd";
    595       case Asse_SUB64:    return "psubq";
    596       case Asse_QSUB8U:   return "psubusb";
    597       case Asse_QSUB16U:  return "psubusw";
    598       case Asse_QSUB8S:   return "psubsb";
    599       case Asse_QSUB16S:  return "psubsw";
    600       case Asse_MUL16:    return "pmullw";
    601       case Asse_MULHI16U: return "pmulhuw";
    602       case Asse_MULHI16S: return "pmulhw";
    603       case Asse_AVG8U:    return "pavgb";
    604       case Asse_AVG16U:   return "pavgw";
    605       case Asse_MAX16S:   return "pmaxw";
    606       case Asse_MAX8U:    return "pmaxub";
    607       case Asse_MIN16S:   return "pminw";
    608       case Asse_MIN8U:    return "pminub";
    609       case Asse_CMPEQ8:   return "pcmpeqb";
    610       case Asse_CMPEQ16:  return "pcmpeqw";
    611       case Asse_CMPEQ32:  return "pcmpeqd";
    612       case Asse_CMPGT8S:  return "pcmpgtb";
    613       case Asse_CMPGT16S: return "pcmpgtw";
    614       case Asse_CMPGT32S: return "pcmpgtd";
    615       case Asse_SHL16:    return "psllw";
    616       case Asse_SHL32:    return "pslld";
    617       case Asse_SHL64:    return "psllq";
    618       case Asse_SHR16:    return "psrlw";
    619       case Asse_SHR32:    return "psrld";
    620       case Asse_SHR64:    return "psrlq";
    621       case Asse_SAR16:    return "psraw";
    622       case Asse_SAR32:    return "psrad";
    623       case Asse_PACKSSD:  return "packssdw";
    624       case Asse_PACKSSW:  return "packsswb";
    625       case Asse_PACKUSW:  return "packuswb";
    626       case Asse_UNPCKHB:  return "punpckhb";
    627       case Asse_UNPCKHW:  return "punpckhw";
    628       case Asse_UNPCKHD:  return "punpckhd";
    629       case Asse_UNPCKHQ:  return "punpckhq";
    630       case Asse_UNPCKLB:  return "punpcklb";
    631       case Asse_UNPCKLW:  return "punpcklw";
    632       case Asse_UNPCKLD:  return "punpckld";
    633       case Asse_UNPCKLQ:  return "punpcklq";
    634       default: vpanic("showAMD64SseOp");
    635    }
    636 }
    637 
    638 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
    639    AMD64Instr* i      = LibVEX_Alloc(sizeof(AMD64Instr));
    640    i->tag             = Ain_Imm64;
    641    i->Ain.Imm64.imm64 = imm64;
    642    i->Ain.Imm64.dst   = dst;
    643    return i;
    644 }
    645 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
    646    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    647    i->tag            = Ain_Alu64R;
    648    i->Ain.Alu64R.op  = op;
    649    i->Ain.Alu64R.src = src;
    650    i->Ain.Alu64R.dst = dst;
    651    return i;
    652 }
    653 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
    654    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    655    i->tag            = Ain_Alu64M;
    656    i->Ain.Alu64M.op  = op;
    657    i->Ain.Alu64M.src = src;
    658    i->Ain.Alu64M.dst = dst;
    659    vassert(op != Aalu_MUL);
    660    return i;
    661 }
    662 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
    663    AMD64Instr* i   = LibVEX_Alloc(sizeof(AMD64Instr));
    664    i->tag          = Ain_Sh64;
    665    i->Ain.Sh64.op  = op;
    666    i->Ain.Sh64.src = src;
    667    i->Ain.Sh64.dst = dst;
    668    return i;
    669 }
    670 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
    671    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
    672    i->tag              = Ain_Test64;
    673    i->Ain.Test64.imm32 = imm32;
    674    i->Ain.Test64.dst   = dst;
    675    return i;
    676 }
    677 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
    678    AMD64Instr* i      = LibVEX_Alloc(sizeof(AMD64Instr));
    679    i->tag             = Ain_Unary64;
    680    i->Ain.Unary64.op  = op;
    681    i->Ain.Unary64.dst = dst;
    682    return i;
    683 }
    684 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
    685    AMD64Instr* i      = LibVEX_Alloc(sizeof(AMD64Instr));
    686    i->tag             = Ain_Lea64;
    687    i->Ain.Lea64.am    = am;
    688    i->Ain.Lea64.dst   = dst;
    689    return i;
    690 }
    691 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
    692    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    693    i->tag            = Ain_Alu32R;
    694    i->Ain.Alu32R.op  = op;
    695    i->Ain.Alu32R.src = src;
    696    i->Ain.Alu32R.dst = dst;
    697    switch (op) {
    698       case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
    699       case Aalu_AND: case Aalu_OR:  case Aalu_XOR: break;
    700       default: vassert(0);
    701    }
    702    return i;
    703 }
    704 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
    705    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    706    i->tag            = Ain_MulL;
    707    i->Ain.MulL.syned = syned;
    708    i->Ain.MulL.src   = src;
    709    return i;
    710 }
    711 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
    712    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    713    i->tag            = Ain_Div;
    714    i->Ain.Div.syned  = syned;
    715    i->Ain.Div.sz     = sz;
    716    i->Ain.Div.src    = src;
    717    vassert(sz == 4 || sz == 8);
    718    return i;
    719 }
    720 //.. AMD64Instr* AMD64Instr_Sh3232  ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) {
    721 //..    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
    722 //..    i->tag            = Xin_Sh3232;
    723 //..    i->Xin.Sh3232.op  = op;
    724 //..    i->Xin.Sh3232.amt = amt;
    725 //..    i->Xin.Sh3232.src = src;
    726 //..    i->Xin.Sh3232.dst = dst;
    727 //..    vassert(op == Xsh_SHL || op == Xsh_SHR);
    728 //..    return i;
    729 //.. }
    730 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
    731    AMD64Instr* i   = LibVEX_Alloc(sizeof(AMD64Instr));
    732    i->tag          = Ain_Push;
    733    i->Ain.Push.src = src;
    734    return i;
    735 }
    736 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) {
    737    AMD64Instr* i        = LibVEX_Alloc(sizeof(AMD64Instr));
    738    i->tag               = Ain_Call;
    739    i->Ain.Call.cond     = cond;
    740    i->Ain.Call.target   = target;
    741    i->Ain.Call.regparms = regparms;
    742    vassert(regparms >= 0 && regparms <= 6);
    743    return i;
    744 }
    745 AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) {
    746    AMD64Instr* i    = LibVEX_Alloc(sizeof(AMD64Instr));
    747    i->tag           = Ain_Goto;
    748    i->Ain.Goto.cond = cond;
    749    i->Ain.Goto.dst  = dst;
    750    i->Ain.Goto.jk   = jk;
    751    return i;
    752 }
    753 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
    754    AMD64Instr* i      = LibVEX_Alloc(sizeof(AMD64Instr));
    755    i->tag             = Ain_CMov64;
    756    i->Ain.CMov64.cond = cond;
    757    i->Ain.CMov64.src  = src;
    758    i->Ain.CMov64.dst  = dst;
    759    vassert(cond != Acc_ALWAYS);
    760    return i;
    761 }
    762 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
    763    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
    764    i->tag              = Ain_MovxLQ;
    765    i->Ain.MovxLQ.syned = syned;
    766    i->Ain.MovxLQ.src   = src;
    767    i->Ain.MovxLQ.dst   = dst;
    768    return i;
    769 }
    770 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
    771                                 AMD64AMode* src, HReg dst ) {
    772    AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
    773    i->tag                = Ain_LoadEX;
    774    i->Ain.LoadEX.szSmall = szSmall;
    775    i->Ain.LoadEX.syned   = syned;
    776    i->Ain.LoadEX.src     = src;
    777    i->Ain.LoadEX.dst     = dst;
    778    vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
    779    return i;
    780 }
    781 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
    782    AMD64Instr* i    = LibVEX_Alloc(sizeof(AMD64Instr));
    783    i->tag           = Ain_Store;
    784    i->Ain.Store.sz  = sz;
    785    i->Ain.Store.src = src;
    786    i->Ain.Store.dst = dst;
    787    vassert(sz == 1 || sz == 2 || sz == 4);
    788    return i;
    789 }
    790 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
    791    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    792    i->tag            = Ain_Set64;
    793    i->Ain.Set64.cond = cond;
    794    i->Ain.Set64.dst  = dst;
    795    return i;
    796 }
    797 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
    798    AMD64Instr* i        = LibVEX_Alloc(sizeof(AMD64Instr));
    799    i->tag               = Ain_Bsfr64;
    800    i->Ain.Bsfr64.isFwds = isFwds;
    801    i->Ain.Bsfr64.src    = src;
    802    i->Ain.Bsfr64.dst    = dst;
    803    return i;
    804 }
    805 AMD64Instr* AMD64Instr_MFence ( void ) {
    806    AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
    807    i->tag        = Ain_MFence;
    808    return i;
    809 }
    810 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
    811    AMD64Instr* i    = LibVEX_Alloc(sizeof(AMD64Instr));
    812    i->tag           = Ain_ACAS;
    813    i->Ain.ACAS.addr = addr;
    814    i->Ain.ACAS.sz   = sz;
    815    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
    816    return i;
    817 }
    818 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
    819    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    820    i->tag            = Ain_DACAS;
    821    i->Ain.DACAS.addr = addr;
    822    i->Ain.DACAS.sz   = sz;
    823    vassert(sz == 8 || sz == 4);
    824    return i;
    825 }
    826 
    827 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
    828 {
    829    AMD64Instr* i        = LibVEX_Alloc(sizeof(AMD64Instr));
    830    i->tag               = Ain_A87Free;
    831    i->Ain.A87Free.nregs = nregs;
    832    vassert(nregs >= 1 && nregs <= 7);
    833    return i;
    834 }
    835 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
    836 {
    837    AMD64Instr* i            = LibVEX_Alloc(sizeof(AMD64Instr));
    838    i->tag                   = Ain_A87PushPop;
    839    i->Ain.A87PushPop.addr   = addr;
    840    i->Ain.A87PushPop.isPush = isPush;
    841    i->Ain.A87PushPop.szB    = szB;
    842    vassert(szB == 8 || szB == 4);
    843    return i;
    844 }
    845 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
    846 {
    847    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    848    i->tag            = Ain_A87FpOp;
    849    i->Ain.A87FpOp.op = op;
    850    return i;
    851 }
    852 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
    853 {
    854    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
    855    i->tag              = Ain_A87LdCW;
    856    i->Ain.A87LdCW.addr = addr;
    857    return i;
    858 }
    859 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
    860 {
    861    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
    862    i->tag              = Ain_A87StSW;
    863    i->Ain.A87StSW.addr = addr;
    864    return i;
    865 }
    866 
    867 //.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) {
    868 //..    AMD64Instr* i        = LibVEX_Alloc(sizeof(AMD64Instr));
    869 //..    i->tag             = Xin_FpUnary;
    870 //..    i->Xin.FpUnary.op  = op;
    871 //..    i->Xin.FpUnary.src = src;
    872 //..    i->Xin.FpUnary.dst = dst;
    873 //..    return i;
    874 //.. }
    875 //.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) {
    876 //..    AMD64Instr* i          = LibVEX_Alloc(sizeof(AMD64Instr));
    877 //..    i->tag               = Xin_FpBinary;
    878 //..    i->Xin.FpBinary.op   = op;
    879 //..    i->Xin.FpBinary.srcL = srcL;
    880 //..    i->Xin.FpBinary.srcR = srcR;
    881 //..    i->Xin.FpBinary.dst  = dst;
    882 //..    return i;
    883 //.. }
    884 //.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) {
    885 //..    AMD64Instr* i          = LibVEX_Alloc(sizeof(AMD64Instr));
    886 //..    i->tag               = Xin_FpLdSt;
    887 //..    i->Xin.FpLdSt.isLoad = isLoad;
    888 //..    i->Xin.FpLdSt.sz     = sz;
    889 //..    i->Xin.FpLdSt.reg    = reg;
    890 //..    i->Xin.FpLdSt.addr   = addr;
    891 //..    vassert(sz == 4 || sz == 8);
    892 //..    return i;
    893 //.. }
    894 //.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz,
    895 //..                              HReg reg, AMD64AMode* addr ) {
    896 //..    AMD64Instr* i           = LibVEX_Alloc(sizeof(AMD64Instr));
    897 //..    i->tag                = Xin_FpLdStI;
    898 //..    i->Xin.FpLdStI.isLoad = isLoad;
    899 //..    i->Xin.FpLdStI.sz     = sz;
    900 //..    i->Xin.FpLdStI.reg    = reg;
    901 //..    i->Xin.FpLdStI.addr   = addr;
    902 //..    vassert(sz == 2 || sz == 4 || sz == 8);
    903 //..    return i;
    904 //.. }
    905 //.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) {
    906 //..    AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
    907 //..    i->tag              = Xin_Fp64to32;
    908 //..    i->Xin.Fp64to32.src = src;
    909 //..    i->Xin.Fp64to32.dst = dst;
    910 //..    return i;
    911 //.. }
    912 //.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
    913 //..    AMD64Instr* i        = LibVEX_Alloc(sizeof(AMD64Instr));
    914 //..    i->tag             = Xin_FpCMov;
    915 //..    i->Xin.FpCMov.cond = cond;
    916 //..    i->Xin.FpCMov.src  = src;
    917 //..    i->Xin.FpCMov.dst  = dst;
    918 //..    vassert(cond != Xcc_ALWAYS);
    919 //..    return i;
    920 //.. }
    921 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
    922    AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
    923    i->tag                = Ain_LdMXCSR;
    924    i->Ain.LdMXCSR.addr   = addr;
    925    return i;
    926 }
    927 //.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) {
    928 //..    AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
    929 //..    i->tag      = Xin_FpStSW_AX;
    930 //..    return i;
    931 //.. }
    932 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
    933    AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
    934    i->tag                = Ain_SseUComIS;
    935    i->Ain.SseUComIS.sz   = toUChar(sz);
    936    i->Ain.SseUComIS.srcL = srcL;
    937    i->Ain.SseUComIS.srcR = srcR;
    938    i->Ain.SseUComIS.dst  = dst;
    939    vassert(sz == 4 || sz == 8);
    940    return i;
    941 }
    942 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
    943    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
    944    i->tag              = Ain_SseSI2SF;
    945    i->Ain.SseSI2SF.szS = toUChar(szS);
    946    i->Ain.SseSI2SF.szD = toUChar(szD);
    947    i->Ain.SseSI2SF.src = src;
    948    i->Ain.SseSI2SF.dst = dst;
    949    vassert(szS == 4 || szS == 8);
    950    vassert(szD == 4 || szD == 8);
    951    return i;
    952 }
    953 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
    954    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
    955    i->tag              = Ain_SseSF2SI;
    956    i->Ain.SseSF2SI.szS = toUChar(szS);
    957    i->Ain.SseSF2SI.szD = toUChar(szD);
    958    i->Ain.SseSF2SI.src = src;
    959    i->Ain.SseSF2SI.dst = dst;
    960    vassert(szS == 4 || szS == 8);
    961    vassert(szD == 4 || szD == 8);
    962    return i;
    963 }
    964 AMD64Instr* AMD64Instr_SseSDSS   ( Bool from64, HReg src, HReg dst )
    965 {
    966    AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
    967    i->tag                = Ain_SseSDSS;
    968    i->Ain.SseSDSS.from64 = from64;
    969    i->Ain.SseSDSS.src    = src;
    970    i->Ain.SseSDSS.dst    = dst;
    971    return i;
    972 }
    973 
    974 //.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) {
    975 //..    AMD64Instr* i            = LibVEX_Alloc(sizeof(AMD64Instr));
    976 //..    i->tag                 = Xin_SseConst;
    977 //..    i->Xin.SseConst.con    = con;
    978 //..    i->Xin.SseConst.dst    = dst;
    979 //..    vassert(hregClass(dst) == HRcVec128);
    980 //..    return i;
    981 //.. }
    982 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
    983                                  HReg reg, AMD64AMode* addr ) {
    984    AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
    985    i->tag                = Ain_SseLdSt;
    986    i->Ain.SseLdSt.isLoad = isLoad;
    987    i->Ain.SseLdSt.sz     = toUChar(sz);
    988    i->Ain.SseLdSt.reg    = reg;
    989    i->Ain.SseLdSt.addr   = addr;
    990    vassert(sz == 4 || sz == 8 || sz == 16);
    991    return i;
    992 }
    993 AMD64Instr* AMD64Instr_SseLdzLO  ( Int sz, HReg reg, AMD64AMode* addr )
    994 {
    995    AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
    996    i->tag                = Ain_SseLdzLO;
    997    i->Ain.SseLdzLO.sz    = sz;
    998    i->Ain.SseLdzLO.reg   = reg;
    999    i->Ain.SseLdzLO.addr  = addr;
   1000    vassert(sz == 4 || sz == 8);
   1001    return i;
   1002 }
   1003 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
   1004    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
   1005    i->tag              = Ain_Sse32Fx4;
   1006    i->Ain.Sse32Fx4.op  = op;
   1007    i->Ain.Sse32Fx4.src = src;
   1008    i->Ain.Sse32Fx4.dst = dst;
   1009    vassert(op != Asse_MOV);
   1010    return i;
   1011 }
   1012 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
   1013    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
   1014    i->tag              = Ain_Sse32FLo;
   1015    i->Ain.Sse32FLo.op  = op;
   1016    i->Ain.Sse32FLo.src = src;
   1017    i->Ain.Sse32FLo.dst = dst;
   1018    vassert(op != Asse_MOV);
   1019    return i;
   1020 }
   1021 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
   1022    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
   1023    i->tag              = Ain_Sse64Fx2;
   1024    i->Ain.Sse64Fx2.op  = op;
   1025    i->Ain.Sse64Fx2.src = src;
   1026    i->Ain.Sse64Fx2.dst = dst;
   1027    vassert(op != Asse_MOV);
   1028    return i;
   1029 }
   1030 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
   1031    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
   1032    i->tag              = Ain_Sse64FLo;
   1033    i->Ain.Sse64FLo.op  = op;
   1034    i->Ain.Sse64FLo.src = src;
   1035    i->Ain.Sse64FLo.dst = dst;
   1036    vassert(op != Asse_MOV);
   1037    return i;
   1038 }
   1039 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
   1040    AMD64Instr* i      = LibVEX_Alloc(sizeof(AMD64Instr));
   1041    i->tag             = Ain_SseReRg;
   1042    i->Ain.SseReRg.op  = op;
   1043    i->Ain.SseReRg.src = re;
   1044    i->Ain.SseReRg.dst = rg;
   1045    return i;
   1046 }
   1047 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
   1048    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
   1049    i->tag              = Ain_SseCMov;
   1050    i->Ain.SseCMov.cond = cond;
   1051    i->Ain.SseCMov.src  = src;
   1052    i->Ain.SseCMov.dst  = dst;
   1053    vassert(cond != Acc_ALWAYS);
   1054    return i;
   1055 }
   1056 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
   1057    AMD64Instr* i        = LibVEX_Alloc(sizeof(AMD64Instr));
   1058    i->tag               = Ain_SseShuf;
   1059    i->Ain.SseShuf.order = order;
   1060    i->Ain.SseShuf.src   = src;
   1061    i->Ain.SseShuf.dst   = dst;
   1062    vassert(order >= 0 && order <= 0xFF);
   1063    return i;
   1064 }
   1065 
   1066 void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
   1067 {
   1068    vassert(mode64 == True);
   1069    switch (i->tag) {
   1070       case Ain_Imm64:
   1071          vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
   1072          ppHRegAMD64(i->Ain.Imm64.dst);
   1073          return;
   1074       case Ain_Alu64R:
   1075          vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
   1076          ppAMD64RMI(i->Ain.Alu64R.src);
   1077          vex_printf(",");
   1078          ppHRegAMD64(i->Ain.Alu64R.dst);
   1079          return;
   1080       case Ain_Alu64M:
   1081          vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
   1082          ppAMD64RI(i->Ain.Alu64M.src);
   1083          vex_printf(",");
   1084          ppAMD64AMode(i->Ain.Alu64M.dst);
   1085          return;
   1086       case Ain_Sh64:
   1087          vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
   1088          if (i->Ain.Sh64.src == 0)
   1089             vex_printf("%%cl,");
   1090          else
   1091             vex_printf("$%d,", (Int)i->Ain.Sh64.src);
   1092          ppHRegAMD64(i->Ain.Sh64.dst);
   1093          return;
   1094       case Ain_Test64:
   1095          vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
   1096          ppHRegAMD64(i->Ain.Test64.dst);
   1097          return;
   1098       case Ain_Unary64:
   1099          vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
   1100          ppHRegAMD64(i->Ain.Unary64.dst);
   1101          return;
   1102       case Ain_Lea64:
   1103          vex_printf("leaq ");
   1104          ppAMD64AMode(i->Ain.Lea64.am);
   1105          vex_printf(",");
   1106          ppHRegAMD64(i->Ain.Lea64.dst);
   1107          return;
   1108       case Ain_Alu32R:
   1109          vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
   1110          ppAMD64RMI_lo32(i->Ain.Alu32R.src);
   1111          vex_printf(",");
   1112          ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
   1113          return;
   1114       case Ain_MulL:
   1115          vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
   1116          ppAMD64RM(i->Ain.MulL.src);
   1117          return;
   1118       case Ain_Div:
   1119          vex_printf("%cdiv%s ",
   1120                     i->Ain.Div.syned ? 's' : 'u',
   1121                     showAMD64ScalarSz(i->Ain.Div.sz));
   1122          ppAMD64RM(i->Ain.Div.src);
   1123          return;
   1124 //..       case Xin_Sh3232:
   1125 //..          vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op));
   1126 //..          if (i->Xin.Sh3232.amt == 0)
   1127 //..            vex_printf(" %%cl,");
   1128 //..          else
   1129 //..             vex_printf(" $%d,", i->Xin.Sh3232.amt);
   1130 //..          ppHRegAMD64(i->Xin.Sh3232.src);
   1131 //..          vex_printf(",");
   1132 //..          ppHRegAMD64(i->Xin.Sh3232.dst);
   1133 //..          return;
   1134       case Ain_Push:
   1135          vex_printf("pushq ");
   1136          ppAMD64RMI(i->Ain.Push.src);
   1137          return;
   1138       case Ain_Call:
   1139          vex_printf("call%s[%d] ",
   1140                     i->Ain.Call.cond==Acc_ALWAYS
   1141                        ? "" : showAMD64CondCode(i->Ain.Call.cond),
   1142                     i->Ain.Call.regparms );
   1143          vex_printf("0x%llx", i->Ain.Call.target);
   1144          break;
   1145       case Ain_Goto:
   1146          if (i->Ain.Goto.cond != Acc_ALWAYS) {
   1147             vex_printf("if (%%rflags.%s) { ",
   1148                        showAMD64CondCode(i->Ain.Goto.cond));
   1149          }
   1150          if (i->Ain.Goto.jk != Ijk_Boring
   1151              && i->Ain.Goto.jk != Ijk_Call
   1152              && i->Ain.Goto.jk != Ijk_Ret) {
   1153             vex_printf("movl $");
   1154             ppIRJumpKind(i->Ain.Goto.jk);
   1155             vex_printf(",%%ebp ; ");
   1156          }
   1157          vex_printf("movq ");
   1158          ppAMD64RI(i->Ain.Goto.dst);
   1159          vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx");
   1160          if (i->Ain.Goto.cond != Acc_ALWAYS) {
   1161             vex_printf(" }");
   1162          }
   1163          return;
   1164       case Ain_CMov64:
   1165          vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
   1166          ppAMD64RM(i->Ain.CMov64.src);
   1167          vex_printf(",");
   1168          ppHRegAMD64(i->Ain.CMov64.dst);
   1169          return;
   1170       case Ain_MovxLQ:
   1171          vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
   1172          ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
   1173          vex_printf(",");
   1174          ppHRegAMD64(i->Ain.MovxLQ.dst);
   1175          return;
   1176       case Ain_LoadEX:
   1177          if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
   1178             vex_printf("movl ");
   1179             ppAMD64AMode(i->Ain.LoadEX.src);
   1180             vex_printf(",");
   1181             ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
   1182          } else {
   1183             vex_printf("mov%c%cq ",
   1184                        i->Ain.LoadEX.syned ? 's' : 'z',
   1185                        i->Ain.LoadEX.szSmall==1
   1186                           ? 'b'
   1187                           : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
   1188             ppAMD64AMode(i->Ain.LoadEX.src);
   1189             vex_printf(",");
   1190             ppHRegAMD64(i->Ain.LoadEX.dst);
   1191          }
   1192          return;
   1193       case Ain_Store:
   1194          vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
   1195                               : (i->Ain.Store.sz==2 ? 'w' : 'l'));
   1196          ppHRegAMD64(i->Ain.Store.src);
   1197          vex_printf(",");
   1198          ppAMD64AMode(i->Ain.Store.dst);
   1199          return;
   1200       case Ain_Set64:
   1201          vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
   1202          ppHRegAMD64(i->Ain.Set64.dst);
   1203          return;
   1204       case Ain_Bsfr64:
   1205          vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
   1206          ppHRegAMD64(i->Ain.Bsfr64.src);
   1207          vex_printf(",");
   1208          ppHRegAMD64(i->Ain.Bsfr64.dst);
   1209          return;
   1210       case Ain_MFence:
   1211          vex_printf("mfence" );
   1212          return;
   1213       case Ain_ACAS:
   1214          vex_printf("lock cmpxchg%c ",
   1215                      i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
   1216                      : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
   1217          vex_printf("{%%rax->%%rbx},");
   1218          ppAMD64AMode(i->Ain.ACAS.addr);
   1219          return;
   1220       case Ain_DACAS:
   1221          vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
   1222                     (Int)(2 * i->Ain.DACAS.sz));
   1223          ppAMD64AMode(i->Ain.DACAS.addr);
   1224          return;
   1225       case Ain_A87Free:
   1226          vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
   1227          break;
   1228       case Ain_A87PushPop:
   1229          vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
   1230                     i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
   1231          ppAMD64AMode(i->Ain.A87PushPop.addr);
   1232          break;
   1233       case Ain_A87FpOp:
   1234          vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
   1235          break;
   1236       case Ain_A87LdCW:
   1237          vex_printf("fldcw ");
   1238          ppAMD64AMode(i->Ain.A87LdCW.addr);
   1239          break;
   1240       case Ain_A87StSW:
   1241          vex_printf("fstsw ");
   1242          ppAMD64AMode(i->Ain.A87StSW.addr);
   1243          break;
   1244 //..       case Xin_FpUnary:
   1245 //..          vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
   1246 //..          ppHRegAMD64(i->Xin.FpUnary.src);
   1247 //..          vex_printf(",");
   1248 //..          ppHRegAMD64(i->Xin.FpUnary.dst);
   1249 //..          break;
   1250 //..       case Xin_FpBinary:
   1251 //..          vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op));
   1252 //..          ppHRegAMD64(i->Xin.FpBinary.srcL);
   1253 //..          vex_printf(",");
   1254 //..          ppHRegAMD64(i->Xin.FpBinary.srcR);
   1255 //..          vex_printf(",");
   1256 //..          ppHRegAMD64(i->Xin.FpBinary.dst);
   1257 //..          break;
   1258 //..       case Xin_FpLdSt:
   1259 //..          if (i->Xin.FpLdSt.isLoad) {
   1260 //..             vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
   1261 //..             ppAMD64AMode(i->Xin.FpLdSt.addr);
   1262 //..             vex_printf(", ");
   1263 //..             ppHRegAMD64(i->Xin.FpLdSt.reg);
   1264 //..          } else {
   1265 //..             vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
   1266 //..             ppHRegAMD64(i->Xin.FpLdSt.reg);
   1267 //..             vex_printf(", ");
   1268 //..             ppAMD64AMode(i->Xin.FpLdSt.addr);
   1269 //..          }
   1270 //..          return;
   1271 //..       case Xin_FpLdStI:
   1272 //..          if (i->Xin.FpLdStI.isLoad) {
   1273 //..             vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1274 //..                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1275 //..             ppAMD64AMode(i->Xin.FpLdStI.addr);
   1276 //..             vex_printf(", ");
   1277 //..             ppHRegAMD64(i->Xin.FpLdStI.reg);
   1278 //..          } else {
   1279 //..             vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1280 //..                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1281 //..             ppHRegAMD64(i->Xin.FpLdStI.reg);
   1282 //..             vex_printf(", ");
   1283 //..             ppAMD64AMode(i->Xin.FpLdStI.addr);
   1284 //..          }
   1285 //..          return;
   1286 //..       case Xin_Fp64to32:
   1287 //..          vex_printf("gdtof ");
   1288 //..          ppHRegAMD64(i->Xin.Fp64to32.src);
   1289 //..          vex_printf(",");
   1290 //..          ppHRegAMD64(i->Xin.Fp64to32.dst);
   1291 //..          return;
   1292 //..       case Xin_FpCMov:
   1293 //..          vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond));
   1294 //..          ppHRegAMD64(i->Xin.FpCMov.src);
   1295 //..          vex_printf(",");
   1296 //..          ppHRegAMD64(i->Xin.FpCMov.dst);
   1297 //..          return;
   1298 //..       case Xin_FpLdStCW:
   1299 //..          vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw ");
   1300 //..          ppAMD64AMode(i->Xin.FpLdStCW.addr);
   1301 //..          return;
   1302 //..       case Xin_FpStSW_AX:
   1303 //..          vex_printf("fstsw %%ax");
   1304 //..          return;
   1305       case Ain_LdMXCSR:
   1306          vex_printf("ldmxcsr ");
   1307          ppAMD64AMode(i->Ain.LdMXCSR.addr);
   1308          break;
   1309       case Ain_SseUComIS:
   1310          vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
   1311          ppHRegAMD64(i->Ain.SseUComIS.srcL);
   1312          vex_printf(",");
   1313          ppHRegAMD64(i->Ain.SseUComIS.srcR);
   1314          vex_printf(" ; pushfq ; popq ");
   1315          ppHRegAMD64(i->Ain.SseUComIS.dst);
   1316          break;
   1317       case Ain_SseSI2SF:
   1318          vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
   1319          (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
   1320             (i->Ain.SseSI2SF.src);
   1321          vex_printf(",");
   1322          ppHRegAMD64(i->Ain.SseSI2SF.dst);
   1323          break;
   1324       case Ain_SseSF2SI:
   1325          vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
   1326          ppHRegAMD64(i->Ain.SseSF2SI.src);
   1327          vex_printf(",");
   1328          (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
   1329             (i->Ain.SseSF2SI.dst);
   1330          break;
   1331       case Ain_SseSDSS:
   1332          vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
   1333          ppHRegAMD64(i->Ain.SseSDSS.src);
   1334          vex_printf(",");
   1335          ppHRegAMD64(i->Ain.SseSDSS.dst);
   1336          break;
   1337 //..       case Xin_SseConst:
   1338 //..          vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
   1339 //..          ppHRegAMD64(i->Xin.SseConst.dst);
   1340 //..          break;
   1341       case Ain_SseLdSt:
   1342          switch (i->Ain.SseLdSt.sz) {
   1343             case 4:  vex_printf("movss "); break;
   1344             case 8:  vex_printf("movsd "); break;
   1345             case 16: vex_printf("movups "); break;
   1346             default: vassert(0);
   1347          }
   1348          if (i->Ain.SseLdSt.isLoad) {
   1349             ppAMD64AMode(i->Ain.SseLdSt.addr);
   1350             vex_printf(",");
   1351             ppHRegAMD64(i->Ain.SseLdSt.reg);
   1352          } else {
   1353             ppHRegAMD64(i->Ain.SseLdSt.reg);
   1354             vex_printf(",");
   1355             ppAMD64AMode(i->Ain.SseLdSt.addr);
   1356          }
   1357          return;
   1358       case Ain_SseLdzLO:
   1359          vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
   1360          ppAMD64AMode(i->Ain.SseLdzLO.addr);
   1361          vex_printf(",");
   1362          ppHRegAMD64(i->Ain.SseLdzLO.reg);
   1363          return;
   1364       case Ain_Sse32Fx4:
   1365          vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
   1366          ppHRegAMD64(i->Ain.Sse32Fx4.src);
   1367          vex_printf(",");
   1368          ppHRegAMD64(i->Ain.Sse32Fx4.dst);
   1369          return;
   1370       case Ain_Sse32FLo:
   1371          vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
   1372          ppHRegAMD64(i->Ain.Sse32FLo.src);
   1373          vex_printf(",");
   1374          ppHRegAMD64(i->Ain.Sse32FLo.dst);
   1375          return;
   1376       case Ain_Sse64Fx2:
   1377          vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
   1378          ppHRegAMD64(i->Ain.Sse64Fx2.src);
   1379          vex_printf(",");
   1380          ppHRegAMD64(i->Ain.Sse64Fx2.dst);
   1381          return;
   1382       case Ain_Sse64FLo:
   1383          vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
   1384          ppHRegAMD64(i->Ain.Sse64FLo.src);
   1385          vex_printf(",");
   1386          ppHRegAMD64(i->Ain.Sse64FLo.dst);
   1387          return;
   1388       case Ain_SseReRg:
   1389          vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
   1390          ppHRegAMD64(i->Ain.SseReRg.src);
   1391          vex_printf(",");
   1392          ppHRegAMD64(i->Ain.SseReRg.dst);
   1393          return;
   1394       case Ain_SseCMov:
   1395          vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
   1396          ppHRegAMD64(i->Ain.SseCMov.src);
   1397          vex_printf(",");
   1398          ppHRegAMD64(i->Ain.SseCMov.dst);
   1399          return;
   1400       case Ain_SseShuf:
   1401          vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order);
   1402          ppHRegAMD64(i->Ain.SseShuf.src);
   1403          vex_printf(",");
   1404          ppHRegAMD64(i->Ain.SseShuf.dst);
   1405          return;
   1406 
   1407       default:
   1408          vpanic("ppAMD64Instr");
   1409    }
   1410 }
   1411 
   1412 /* --------- Helpers for register allocation. --------- */
   1413 
   1414 void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
   1415 {
   1416    Bool unary;
   1417    vassert(mode64 == True);
   1418    initHRegUsage(u);
   1419    switch (i->tag) {
   1420       case Ain_Imm64:
   1421          addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
   1422          return;
   1423       case Ain_Alu64R:
   1424          addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
   1425          if (i->Ain.Alu64R.op == Aalu_MOV) {
   1426             addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
   1427             return;
   1428          }
   1429          if (i->Ain.Alu64R.op == Aalu_CMP) {
   1430             addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
   1431             return;
   1432          }
   1433          addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
   1434          return;
   1435       case Ain_Alu64M:
   1436          addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
   1437          addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
   1438          return;
   1439       case Ain_Sh64:
   1440          addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
   1441          if (i->Ain.Sh64.src == 0)
   1442             addHRegUse(u, HRmRead, hregAMD64_RCX());
   1443          return;
   1444       case Ain_Test64:
   1445          addHRegUse(u, HRmRead, i->Ain.Test64.dst);
   1446          return;
   1447       case Ain_Unary64:
   1448          addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
   1449          return;
   1450       case Ain_Lea64:
   1451          addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
   1452          addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
   1453          return;
   1454       case Ain_Alu32R:
   1455          vassert(i->Ain.Alu32R.op != Aalu_MOV);
   1456          addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
   1457          if (i->Ain.Alu32R.op == Aalu_CMP) {
   1458             addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
   1459             return;
   1460          }
   1461          addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
   1462          return;
   1463       case Ain_MulL:
   1464          addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
   1465          addHRegUse(u, HRmModify, hregAMD64_RAX());
   1466          addHRegUse(u, HRmWrite, hregAMD64_RDX());
   1467          return;
   1468       case Ain_Div:
   1469          addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
   1470          addHRegUse(u, HRmModify, hregAMD64_RAX());
   1471          addHRegUse(u, HRmModify, hregAMD64_RDX());
   1472          return;
   1473 //..       case Xin_Sh3232:
   1474 //..          addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
   1475 //..          addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
   1476 //..          if (i->Xin.Sh3232.amt == 0)
   1477 //..             addHRegUse(u, HRmRead, hregAMD64_ECX());
   1478 //..          return;
   1479       case Ain_Push:
   1480          addRegUsage_AMD64RMI(u, i->Ain.Push.src);
   1481          addHRegUse(u, HRmModify, hregAMD64_RSP());
   1482          return;
   1483       case Ain_Call:
   1484          /* This is a bit subtle. */
   1485          /* First off, claim it trashes all the caller-saved regs
   1486             which fall within the register allocator's jurisdiction.
   1487             These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
   1488             and all the xmm registers.
   1489          */
   1490          addHRegUse(u, HRmWrite, hregAMD64_RAX());
   1491          addHRegUse(u, HRmWrite, hregAMD64_RCX());
   1492          addHRegUse(u, HRmWrite, hregAMD64_RDX());
   1493          addHRegUse(u, HRmWrite, hregAMD64_RSI());
   1494          addHRegUse(u, HRmWrite, hregAMD64_RDI());
   1495          addHRegUse(u, HRmWrite, hregAMD64_R8());
   1496          addHRegUse(u, HRmWrite, hregAMD64_R9());
   1497          addHRegUse(u, HRmWrite, hregAMD64_R10());
   1498          addHRegUse(u, HRmWrite, hregAMD64_R11());
   1499          addHRegUse(u, HRmWrite, hregAMD64_XMM0());
   1500          addHRegUse(u, HRmWrite, hregAMD64_XMM1());
   1501          addHRegUse(u, HRmWrite, hregAMD64_XMM2());
   1502          addHRegUse(u, HRmWrite, hregAMD64_XMM3());
   1503          addHRegUse(u, HRmWrite, hregAMD64_XMM4());
   1504          addHRegUse(u, HRmWrite, hregAMD64_XMM5());
   1505          addHRegUse(u, HRmWrite, hregAMD64_XMM6());
   1506          addHRegUse(u, HRmWrite, hregAMD64_XMM7());
   1507          addHRegUse(u, HRmWrite, hregAMD64_XMM8());
   1508          addHRegUse(u, HRmWrite, hregAMD64_XMM9());
   1509          addHRegUse(u, HRmWrite, hregAMD64_XMM10());
   1510          addHRegUse(u, HRmWrite, hregAMD64_XMM11());
   1511          addHRegUse(u, HRmWrite, hregAMD64_XMM12());
   1512          addHRegUse(u, HRmWrite, hregAMD64_XMM13());
   1513          addHRegUse(u, HRmWrite, hregAMD64_XMM14());
   1514          addHRegUse(u, HRmWrite, hregAMD64_XMM15());
   1515 
   1516          /* Now we have to state any parameter-carrying registers
   1517             which might be read.  This depends on the regparmness. */
   1518          switch (i->Ain.Call.regparms) {
   1519             case 6: addHRegUse(u, HRmRead, hregAMD64_R9());  /*fallthru*/
   1520             case 5: addHRegUse(u, HRmRead, hregAMD64_R8());  /*fallthru*/
   1521             case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
   1522             case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
   1523             case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
   1524             case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
   1525             case 0: break;
   1526             default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
   1527          }
   1528          /* Finally, there is the issue that the insn trashes a
   1529             register because the literal target address has to be
   1530             loaded into a register.  Fortunately, r11 is stated in the
   1531             ABI as a scratch register, and so seems a suitable victim.  */
   1532          addHRegUse(u, HRmWrite, hregAMD64_R11());
   1533          /* Upshot of this is that the assembler really must use r11,
   1534             and no other, as a destination temporary. */
   1535          return;
   1536       case Ain_Goto:
   1537          addRegUsage_AMD64RI(u, i->Ain.Goto.dst);
   1538          addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */
   1539          addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */
   1540          if (i->Ain.Goto.jk != Ijk_Boring
   1541              && i->Ain.Goto.jk != Ijk_Call
   1542              && i->Ain.Goto.jk != Ijk_Ret)
   1543             /* note, this is irrelevant since rbp is not actually
   1544                available to the allocator.  But still .. */
   1545             addHRegUse(u, HRmWrite, hregAMD64_RBP());
   1546          return;
   1547       case Ain_CMov64:
   1548          addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
   1549          addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
   1550          return;
   1551       case Ain_MovxLQ:
   1552          addHRegUse(u, HRmRead,  i->Ain.MovxLQ.src);
   1553          addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
   1554          return;
   1555       case Ain_LoadEX:
   1556          addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
   1557          addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
   1558          return;
   1559       case Ain_Store:
   1560          addHRegUse(u, HRmRead, i->Ain.Store.src);
   1561          addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
   1562          return;
   1563       case Ain_Set64:
   1564          addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
   1565          return;
   1566       case Ain_Bsfr64:
   1567          addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
   1568          addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
   1569          return;
   1570       case Ain_MFence:
   1571          return;
   1572       case Ain_ACAS:
   1573          addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
   1574          addHRegUse(u, HRmRead, hregAMD64_RBX());
   1575          addHRegUse(u, HRmModify, hregAMD64_RAX());
   1576          return;
   1577       case Ain_DACAS:
   1578          addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
   1579          addHRegUse(u, HRmRead, hregAMD64_RCX());
   1580          addHRegUse(u, HRmRead, hregAMD64_RBX());
   1581          addHRegUse(u, HRmModify, hregAMD64_RDX());
   1582          addHRegUse(u, HRmModify, hregAMD64_RAX());
   1583          return;
   1584       case Ain_A87Free:
   1585          return;
   1586       case Ain_A87PushPop:
   1587          addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
   1588          return;
   1589       case Ain_A87FpOp:
   1590          return;
   1591       case Ain_A87LdCW:
   1592          addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
   1593          return;
   1594       case Ain_A87StSW:
   1595          addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
   1596          return;
   1597 //..       case Xin_FpUnary:
   1598 //..          addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
   1599 //..          addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
   1600 //..          return;
   1601 //..       case Xin_FpBinary:
   1602 //..          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
   1603 //..          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
   1604 //..          addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
   1605 //..          return;
   1606 //..       case Xin_FpLdSt:
   1607 //..          addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr);
   1608 //..          addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
   1609 //..                        i->Xin.FpLdSt.reg);
   1610 //..          return;
   1611 //..       case Xin_FpLdStI:
   1612 //..          addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr);
   1613 //..          addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
   1614 //..                        i->Xin.FpLdStI.reg);
   1615 //..          return;
   1616 //..       case Xin_Fp64to32:
   1617 //..          addHRegUse(u, HRmRead,  i->Xin.Fp64to32.src);
   1618 //..          addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
   1619 //..          return;
   1620 //..       case Xin_FpCMov:
   1621 //..          addHRegUse(u, HRmRead,   i->Xin.FpCMov.src);
   1622 //..          addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
   1623 //..          return;
   1624       case Ain_LdMXCSR:
   1625          addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
   1626          return;
   1627 //..       case Xin_FpStSW_AX:
   1628 //..          addHRegUse(u, HRmWrite, hregAMD64_EAX());
   1629 //..          return;
   1630       case Ain_SseUComIS:
   1631          addHRegUse(u, HRmRead,  i->Ain.SseUComIS.srcL);
   1632          addHRegUse(u, HRmRead,  i->Ain.SseUComIS.srcR);
   1633          addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
   1634          return;
   1635       case Ain_SseSI2SF:
   1636          addHRegUse(u, HRmRead,  i->Ain.SseSI2SF.src);
   1637          addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
   1638          return;
   1639       case Ain_SseSF2SI:
   1640          addHRegUse(u, HRmRead,  i->Ain.SseSF2SI.src);
   1641          addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
   1642          return;
   1643       case Ain_SseSDSS:
   1644          addHRegUse(u, HRmRead,  i->Ain.SseSDSS.src);
   1645          addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
   1646          return;
   1647       case Ain_SseLdSt:
   1648          addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
   1649          addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
   1650                        i->Ain.SseLdSt.reg);
   1651          return;
   1652       case Ain_SseLdzLO:
   1653          addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
   1654          addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
   1655          return;
   1656 //..       case Xin_SseConst:
   1657 //..          addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
   1658 //..          return;
   1659       case Ain_Sse32Fx4:
   1660          vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
   1661          unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
   1662                          || i->Ain.Sse32Fx4.op == Asse_RSQRTF
   1663                          || i->Ain.Sse32Fx4.op == Asse_SQRTF );
   1664          addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
   1665          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1666                        i->Ain.Sse32Fx4.dst);
   1667          return;
   1668       case Ain_Sse32FLo:
   1669          vassert(i->Ain.Sse32FLo.op != Asse_MOV);
   1670          unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
   1671                          || i->Ain.Sse32FLo.op == Asse_RSQRTF
   1672                          || i->Ain.Sse32FLo.op == Asse_SQRTF );
   1673          addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
   1674          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1675                        i->Ain.Sse32FLo.dst);
   1676          return;
   1677       case Ain_Sse64Fx2:
   1678          vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
   1679          unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
   1680                          || i->Ain.Sse64Fx2.op == Asse_RSQRTF
   1681                          || i->Ain.Sse64Fx2.op == Asse_SQRTF );
   1682          addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
   1683          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1684                        i->Ain.Sse64Fx2.dst);
   1685          return;
   1686       case Ain_Sse64FLo:
   1687          vassert(i->Ain.Sse64FLo.op != Asse_MOV);
   1688          unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
   1689                          || i->Ain.Sse64FLo.op == Asse_RSQRTF
   1690                          || i->Ain.Sse64FLo.op == Asse_SQRTF );
   1691          addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
   1692          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1693                        i->Ain.Sse64FLo.dst);
   1694          return;
   1695       case Ain_SseReRg:
   1696          if ( (i->Ain.SseReRg.op == Asse_XOR
   1697                || i->Ain.SseReRg.op == Asse_CMPEQ32)
   1698               && i->Ain.SseReRg.src == i->Ain.SseReRg.dst) {
   1699             /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
   1700                r,r' as a write of a value to r, and independent of any
   1701                previous value in r */
   1702             /* (as opposed to a rite of passage :-) */
   1703             addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
   1704          } else {
   1705             addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
   1706             addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
   1707                              ? HRmWrite : HRmModify,
   1708                           i->Ain.SseReRg.dst);
   1709          }
   1710          return;
   1711       case Ain_SseCMov:
   1712          addHRegUse(u, HRmRead,   i->Ain.SseCMov.src);
   1713          addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
   1714          return;
   1715       case Ain_SseShuf:
   1716          addHRegUse(u, HRmRead,  i->Ain.SseShuf.src);
   1717          addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
   1718          return;
   1719       default:
   1720          ppAMD64Instr(i, mode64);
   1721          vpanic("getRegUsage_AMD64Instr");
   1722    }
   1723 }
   1724 
   1725 /* local helper */
   1726 static inline void mapReg(HRegRemap* m, HReg* r)
   1727 {
   1728    *r = lookupHRegRemap(m, *r);
   1729 }
   1730 
   1731 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
   1732 {
   1733    vassert(mode64 == True);
   1734    switch (i->tag) {
   1735       case Ain_Imm64:
   1736          mapReg(m, &i->Ain.Imm64.dst);
   1737          return;
   1738       case Ain_Alu64R:
   1739          mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
   1740          mapReg(m, &i->Ain.Alu64R.dst);
   1741          return;
   1742       case Ain_Alu64M:
   1743          mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
   1744          mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
   1745          return;
   1746       case Ain_Sh64:
   1747          mapReg(m, &i->Ain.Sh64.dst);
   1748          return;
   1749       case Ain_Test64:
   1750          mapReg(m, &i->Ain.Test64.dst);
   1751          return;
   1752       case Ain_Unary64:
   1753          mapReg(m, &i->Ain.Unary64.dst);
   1754          return;
   1755       case Ain_Lea64:
   1756          mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
   1757          mapReg(m, &i->Ain.Lea64.dst);
   1758          return;
   1759       case Ain_Alu32R:
   1760          mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
   1761          mapReg(m, &i->Ain.Alu32R.dst);
   1762          return;
   1763       case Ain_MulL:
   1764          mapRegs_AMD64RM(m, i->Ain.MulL.src);
   1765          return;
   1766       case Ain_Div:
   1767          mapRegs_AMD64RM(m, i->Ain.Div.src);
   1768          return;
   1769 //..       case Xin_Sh3232:
   1770 //..          mapReg(m, &i->Xin.Sh3232.src);
   1771 //..          mapReg(m, &i->Xin.Sh3232.dst);
   1772 //..          return;
   1773       case Ain_Push:
   1774          mapRegs_AMD64RMI(m, i->Ain.Push.src);
   1775          return;
   1776       case Ain_Call:
   1777          return;
   1778       case Ain_Goto:
   1779          mapRegs_AMD64RI(m, i->Ain.Goto.dst);
   1780          return;
   1781       case Ain_CMov64:
   1782          mapRegs_AMD64RM(m, i->Ain.CMov64.src);
   1783          mapReg(m, &i->Ain.CMov64.dst);
   1784          return;
   1785       case Ain_MovxLQ:
   1786          mapReg(m, &i->Ain.MovxLQ.src);
   1787          mapReg(m, &i->Ain.MovxLQ.dst);
   1788          return;
   1789       case Ain_LoadEX:
   1790          mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
   1791          mapReg(m, &i->Ain.LoadEX.dst);
   1792          return;
   1793       case Ain_Store:
   1794          mapReg(m, &i->Ain.Store.src);
   1795          mapRegs_AMD64AMode(m, i->Ain.Store.dst);
   1796          return;
   1797       case Ain_Set64:
   1798          mapReg(m, &i->Ain.Set64.dst);
   1799          return;
   1800       case Ain_Bsfr64:
   1801          mapReg(m, &i->Ain.Bsfr64.src);
   1802          mapReg(m, &i->Ain.Bsfr64.dst);
   1803          return;
   1804       case Ain_MFence:
   1805          return;
   1806       case Ain_ACAS:
   1807          mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
   1808          return;
   1809       case Ain_DACAS:
   1810          mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
   1811          return;
   1812       case Ain_A87Free:
   1813          return;
   1814       case Ain_A87PushPop:
   1815          mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
   1816          return;
   1817       case Ain_A87FpOp:
   1818          return;
   1819       case Ain_A87LdCW:
   1820          mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
   1821          return;
   1822       case Ain_A87StSW:
   1823          mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
   1824          return;
   1825 //..       case Xin_FpUnary:
   1826 //..          mapReg(m, &i->Xin.FpUnary.src);
   1827 //..          mapReg(m, &i->Xin.FpUnary.dst);
   1828 //..          return;
   1829 //..       case Xin_FpBinary:
   1830 //..          mapReg(m, &i->Xin.FpBinary.srcL);
   1831 //..          mapReg(m, &i->Xin.FpBinary.srcR);
   1832 //..          mapReg(m, &i->Xin.FpBinary.dst);
   1833 //..          return;
   1834 //..       case Xin_FpLdSt:
   1835 //..          mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr);
   1836 //..          mapReg(m, &i->Xin.FpLdSt.reg);
   1837 //..          return;
   1838 //..       case Xin_FpLdStI:
   1839 //..          mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr);
   1840 //..          mapReg(m, &i->Xin.FpLdStI.reg);
   1841 //..          return;
   1842 //..       case Xin_Fp64to32:
   1843 //..          mapReg(m, &i->Xin.Fp64to32.src);
   1844 //..          mapReg(m, &i->Xin.Fp64to32.dst);
   1845 //..          return;
   1846 //..       case Xin_FpCMov:
   1847 //..          mapReg(m, &i->Xin.FpCMov.src);
   1848 //..          mapReg(m, &i->Xin.FpCMov.dst);
   1849 //..          return;
   1850       case Ain_LdMXCSR:
   1851          mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
   1852          return;
   1853 //..       case Xin_FpStSW_AX:
   1854 //..          return;
   1855       case Ain_SseUComIS:
   1856          mapReg(m, &i->Ain.SseUComIS.srcL);
   1857          mapReg(m, &i->Ain.SseUComIS.srcR);
   1858          mapReg(m, &i->Ain.SseUComIS.dst);
   1859          return;
   1860       case Ain_SseSI2SF:
   1861          mapReg(m, &i->Ain.SseSI2SF.src);
   1862          mapReg(m, &i->Ain.SseSI2SF.dst);
   1863          return;
   1864       case Ain_SseSF2SI:
   1865          mapReg(m, &i->Ain.SseSF2SI.src);
   1866          mapReg(m, &i->Ain.SseSF2SI.dst);
   1867          return;
   1868       case Ain_SseSDSS:
   1869          mapReg(m, &i->Ain.SseSDSS.src);
   1870          mapReg(m, &i->Ain.SseSDSS.dst);
   1871          return;
   1872 //..       case Xin_SseConst:
   1873 //..          mapReg(m, &i->Xin.SseConst.dst);
   1874 //..          return;
   1875       case Ain_SseLdSt:
   1876          mapReg(m, &i->Ain.SseLdSt.reg);
   1877          mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
   1878          break;
   1879       case Ain_SseLdzLO:
   1880          mapReg(m, &i->Ain.SseLdzLO.reg);
   1881          mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
   1882          break;
   1883       case Ain_Sse32Fx4:
   1884          mapReg(m, &i->Ain.Sse32Fx4.src);
   1885          mapReg(m, &i->Ain.Sse32Fx4.dst);
   1886          return;
   1887       case Ain_Sse32FLo:
   1888          mapReg(m, &i->Ain.Sse32FLo.src);
   1889          mapReg(m, &i->Ain.Sse32FLo.dst);
   1890          return;
   1891       case Ain_Sse64Fx2:
   1892          mapReg(m, &i->Ain.Sse64Fx2.src);
   1893          mapReg(m, &i->Ain.Sse64Fx2.dst);
   1894          return;
   1895       case Ain_Sse64FLo:
   1896          mapReg(m, &i->Ain.Sse64FLo.src);
   1897          mapReg(m, &i->Ain.Sse64FLo.dst);
   1898          return;
   1899       case Ain_SseReRg:
   1900          mapReg(m, &i->Ain.SseReRg.src);
   1901          mapReg(m, &i->Ain.SseReRg.dst);
   1902          return;
   1903       case Ain_SseCMov:
   1904          mapReg(m, &i->Ain.SseCMov.src);
   1905          mapReg(m, &i->Ain.SseCMov.dst);
   1906          return;
   1907       case Ain_SseShuf:
   1908          mapReg(m, &i->Ain.SseShuf.src);
   1909          mapReg(m, &i->Ain.SseShuf.dst);
   1910          return;
   1911       default:
   1912          ppAMD64Instr(i, mode64);
   1913          vpanic("mapRegs_AMD64Instr");
   1914    }
   1915 }
   1916 
   1917 /* Figure out if i represents a reg-reg move, and if so assign the
   1918    source and destination to *src and *dst.  If in doubt say No.  Used
   1919    by the register allocator to do move coalescing.
   1920 */
   1921 Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst )
   1922 {
   1923    /* Moves between integer regs */
   1924    if (i->tag == Ain_Alu64R) {
   1925       if (i->Ain.Alu64R.op != Aalu_MOV)
   1926          return False;
   1927       if (i->Ain.Alu64R.src->tag != Armi_Reg)
   1928          return False;
   1929       *src = i->Ain.Alu64R.src->Armi.Reg.reg;
   1930       *dst = i->Ain.Alu64R.dst;
   1931       return True;
   1932    }
   1933    /* Moves between vector regs */
   1934    if (i->tag == Ain_SseReRg) {
   1935       if (i->Ain.SseReRg.op != Asse_MOV)
   1936          return False;
   1937       *src = i->Ain.SseReRg.src;
   1938       *dst = i->Ain.SseReRg.dst;
   1939       return True;
   1940    }
   1941    return False;
   1942 }
   1943 
   1944 
   1945 /* Generate amd64 spill/reload instructions under the direction of the
   1946    register allocator.  Note it's critical these don't write the
   1947    condition codes. */
   1948 
   1949 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1950                       HReg rreg, Int offsetB, Bool mode64 )
   1951 {
   1952    AMD64AMode* am;
   1953    vassert(offsetB >= 0);
   1954    vassert(!hregIsVirtual(rreg));
   1955    vassert(mode64 == True);
   1956    *i1 = *i2 = NULL;
   1957    am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
   1958    switch (hregClass(rreg)) {
   1959       case HRcInt64:
   1960          *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
   1961          return;
   1962       case HRcVec128:
   1963          *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
   1964          return;
   1965       default:
   1966          ppHRegClass(hregClass(rreg));
   1967          vpanic("genSpill_AMD64: unimplemented regclass");
   1968    }
   1969 }
   1970 
   1971 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1972                        HReg rreg, Int offsetB, Bool mode64 )
   1973 {
   1974    AMD64AMode* am;
   1975    vassert(offsetB >= 0);
   1976    vassert(!hregIsVirtual(rreg));
   1977    vassert(mode64 == True);
   1978    *i1 = *i2 = NULL;
   1979    am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
   1980    switch (hregClass(rreg)) {
   1981       case HRcInt64:
   1982          *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
   1983          return;
   1984       case HRcVec128:
   1985          *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
   1986          return;
   1987       default:
   1988          ppHRegClass(hregClass(rreg));
   1989          vpanic("genReload_AMD64: unimplemented regclass");
   1990    }
   1991 }
   1992 
   1993 
   1994 /* --------- The amd64 assembler (bleh.) --------- */
   1995 
   1996 /* Produce the low three bits of an integer register number. */
   1997 static UChar iregBits210 ( HReg r )
   1998 {
   1999    UInt n;
   2000    vassert(hregClass(r) == HRcInt64);
   2001    vassert(!hregIsVirtual(r));
   2002    n = hregNumber(r);
   2003    vassert(n <= 15);
   2004    return toUChar(n & 7);
   2005 }
   2006 
   2007 /* Produce bit 3 of an integer register number. */
   2008 static UChar iregBit3 ( HReg r )
   2009 {
   2010    UInt n;
   2011    vassert(hregClass(r) == HRcInt64);
   2012    vassert(!hregIsVirtual(r));
   2013    n = hregNumber(r);
   2014    vassert(n <= 15);
   2015    return toUChar((n >> 3) & 1);
   2016 }
   2017 
   2018 /* Produce a complete 4-bit integer register number. */
   2019 static UChar iregBits3210 ( HReg r )
   2020 {
   2021    UInt n;
   2022    vassert(hregClass(r) == HRcInt64);
   2023    vassert(!hregIsVirtual(r));
   2024    n = hregNumber(r);
   2025    vassert(n <= 15);
   2026    return toUChar(n);
   2027 }
   2028 
   2029 /* Given an xmm (128bit V-class) register number, produce the
   2030    equivalent numbered register in 64-bit I-class.  This is a bit of
   2031    fakery which facilitates using functions that work on integer
   2032    register numbers to be used when assembling SSE instructions
   2033    too. */
   2034 static UInt vreg2ireg ( HReg r )
   2035 {
   2036    UInt n;
   2037    vassert(hregClass(r) == HRcVec128);
   2038    vassert(!hregIsVirtual(r));
   2039    n = hregNumber(r);
   2040    vassert(n <= 15);
   2041    return mkHReg(n, HRcInt64, False);
   2042 }
   2043 
   2044 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
   2045 {
   2046    return toUChar( ((mod & 3) << 6)
   2047                    | ((reg & 7) << 3)
   2048                    | (regmem & 7) );
   2049 }
   2050 
   2051 static UChar mkSIB ( Int shift, Int regindex, Int regbase )
   2052 {
   2053    return toUChar( ((shift & 3) << 6)
   2054                    | ((regindex & 7) << 3)
   2055                    | (regbase & 7) );
   2056 }
   2057 
   2058 static UChar* emit32 ( UChar* p, UInt w32 )
   2059 {
   2060    *p++ = toUChar((w32)       & 0x000000FF);
   2061    *p++ = toUChar((w32 >>  8) & 0x000000FF);
   2062    *p++ = toUChar((w32 >> 16) & 0x000000FF);
   2063    *p++ = toUChar((w32 >> 24) & 0x000000FF);
   2064    return p;
   2065 }
   2066 
   2067 static UChar* emit64 ( UChar* p, ULong w64 )
   2068 {
   2069    p = emit32(p, toUInt(w64         & 0xFFFFFFFF));
   2070    p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
   2071    return p;
   2072 }
   2073 
   2074 /* Does a sign-extend of the lowest 8 bits give
   2075    the original number? */
   2076 static Bool fits8bits ( UInt w32 )
   2077 {
   2078    Int i32 = (Int)w32;
   2079    return toBool(i32 == ((i32 << 24) >> 24));
   2080 }
   2081 /* Can the lower 32 bits be signedly widened to produce the whole
   2082    64-bit value?  In other words, are the top 33 bits either all 0 or
   2083    all 1 ? */
   2084 static Bool fitsIn32Bits ( ULong x )
   2085 {
   2086    Long y0 = (Long)x;
   2087    Long y1 = y0;
   2088    y1 <<= 32;
   2089    y1 >>=/*s*/ 32;
   2090    return toBool(x == y1);
   2091 }
   2092 
   2093 
   2094 /* Forming mod-reg-rm bytes and scale-index-base bytes.
   2095 
   2096      greg,  0(ereg)    |  ereg is not any of: RSP RBP R12 R13
   2097                        =  00 greg ereg
   2098 
   2099      greg,  d8(ereg)   |  ereg is neither of: RSP R12
   2100                        =  01 greg ereg, d8
   2101 
   2102      greg,  d32(ereg)  |  ereg is neither of: RSP R12
   2103                        =  10 greg ereg, d32
   2104 
   2105      greg,  d8(ereg)   |  ereg is either: RSP R12
   2106                        =  01 greg 100, 0x24, d8
   2107                        (lowest bit of rex distinguishes R12/RSP)
   2108 
   2109      greg,  d32(ereg)  |  ereg is either: RSP R12
   2110                        =  10 greg 100, 0x24, d32
   2111                        (lowest bit of rex distinguishes R12/RSP)
   2112 
   2113      -----------------------------------------------
   2114 
   2115      greg,  d8(base,index,scale)
   2116                |  index != RSP
   2117                =  01 greg 100, scale index base, d8
   2118 
   2119      greg,  d32(base,index,scale)
   2120                |  index != RSP
   2121                =  10 greg 100, scale index base, d32
   2122 */
   2123 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
   2124 {
   2125    if (am->tag == Aam_IR) {
   2126       if (am->Aam.IR.imm == 0
   2127           && am->Aam.IR.reg != hregAMD64_RSP()
   2128           && am->Aam.IR.reg != hregAMD64_RBP()
   2129           && am->Aam.IR.reg != hregAMD64_R12()
   2130           && am->Aam.IR.reg != hregAMD64_R13()
   2131          ) {
   2132          *p++ = mkModRegRM(0, iregBits210(greg),
   2133                               iregBits210(am->Aam.IR.reg));
   2134          return p;
   2135       }
   2136       if (fits8bits(am->Aam.IR.imm)
   2137           && am->Aam.IR.reg != hregAMD64_RSP()
   2138           && am->Aam.IR.reg != hregAMD64_R12()
   2139          ) {
   2140          *p++ = mkModRegRM(1, iregBits210(greg),
   2141                               iregBits210(am->Aam.IR.reg));
   2142          *p++ = toUChar(am->Aam.IR.imm & 0xFF);
   2143          return p;
   2144       }
   2145       if (am->Aam.IR.reg != hregAMD64_RSP()
   2146           && am->Aam.IR.reg != hregAMD64_R12()
   2147          ) {
   2148          *p++ = mkModRegRM(2, iregBits210(greg),
   2149                               iregBits210(am->Aam.IR.reg));
   2150          p = emit32(p, am->Aam.IR.imm);
   2151          return p;
   2152       }
   2153       if ((am->Aam.IR.reg == hregAMD64_RSP()
   2154            || am->Aam.IR.reg == hregAMD64_R12())
   2155           && fits8bits(am->Aam.IR.imm)) {
   2156  	 *p++ = mkModRegRM(1, iregBits210(greg), 4);
   2157          *p++ = 0x24;
   2158          *p++ = toUChar(am->Aam.IR.imm & 0xFF);
   2159          return p;
   2160       }
   2161       if (/* (am->Aam.IR.reg == hregAMD64_RSP()
   2162 	     || wait for test case for RSP case */
   2163           am->Aam.IR.reg == hregAMD64_R12()) {
   2164  	 *p++ = mkModRegRM(2, iregBits210(greg), 4);
   2165          *p++ = 0x24;
   2166          p = emit32(p, am->Aam.IR.imm);
   2167          return p;
   2168       }
   2169       ppAMD64AMode(am);
   2170       vpanic("doAMode_M: can't emit amode IR");
   2171       /*NOTREACHED*/
   2172    }
   2173    if (am->tag == Aam_IRRS) {
   2174       if (fits8bits(am->Aam.IRRS.imm)
   2175           && am->Aam.IRRS.index != hregAMD64_RSP()) {
   2176          *p++ = mkModRegRM(1, iregBits210(greg), 4);
   2177          *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
   2178                                           am->Aam.IRRS.base);
   2179          *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
   2180          return p;
   2181       }
   2182       if (am->Aam.IRRS.index != hregAMD64_RSP()) {
   2183          *p++ = mkModRegRM(2, iregBits210(greg), 4);
   2184          *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
   2185                                           am->Aam.IRRS.base);
   2186          p = emit32(p, am->Aam.IRRS.imm);
   2187          return p;
   2188       }
   2189       ppAMD64AMode(am);
   2190       vpanic("doAMode_M: can't emit amode IRRS");
   2191       /*NOTREACHED*/
   2192    }
   2193    vpanic("doAMode_M: unknown amode");
   2194    /*NOTREACHED*/
   2195 }
   2196 
   2197 
   2198 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
   2199 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
   2200 {
   2201    *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg));
   2202    return p;
   2203 }
   2204 
   2205 
   2206 /* Clear the W bit on a REX byte, thereby changing the operand size
   2207    back to whatever that instruction's default operand size is. */
   2208 static inline UChar clearWBit ( UChar rex )
   2209 {
   2210    return toUChar(rex & ~(1<<3));
   2211 }
   2212 
   2213 
   2214 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
   2215 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
   2216 {
   2217    if (am->tag == Aam_IR) {
   2218       UChar W = 1;  /* we want 64-bit mode */
   2219       UChar R = iregBit3(greg);
   2220       UChar X = 0; /* not relevant */
   2221       UChar B = iregBit3(am->Aam.IR.reg);
   2222       return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
   2223    }
   2224    if (am->tag == Aam_IRRS) {
   2225       UChar W = 1;  /* we want 64-bit mode */
   2226       UChar R = iregBit3(greg);
   2227       UChar X = iregBit3(am->Aam.IRRS.index);
   2228       UChar B = iregBit3(am->Aam.IRRS.base);
   2229       return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
   2230    }
   2231    vassert(0);
   2232    return 0; /*NOTREACHED*/
   2233 }
   2234 
   2235 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
   2236 static UChar rexAMode_R ( HReg greg, HReg ereg )
   2237 {
   2238    UChar W = 1;  /* we want 64-bit mode */
   2239    UChar R = iregBit3(greg);
   2240    UChar X = 0; /* not relevant */
   2241    UChar B = iregBit3(ereg);
   2242    return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
   2243 }
   2244 
   2245 
   2246 /* Emit ffree %st(N) */
   2247 static UChar* do_ffree_st ( UChar* p, Int n )
   2248 {
   2249    vassert(n >= 0 && n <= 7);
   2250    *p++ = 0xDD;
   2251    *p++ = toUChar(0xC0 + n);
   2252    return p;
   2253 }
   2254 
   2255 //.. /* Emit fstp %st(i), 1 <= i <= 7 */
   2256 //.. static UChar* do_fstp_st ( UChar* p, Int i )
   2257 //.. {
   2258 //..    vassert(1 <= i && i <= 7);
   2259 //..    *p++ = 0xDD;
   2260 //..    *p++ = 0xD8+i;
   2261 //..    return p;
   2262 //.. }
   2263 //..
   2264 //.. /* Emit fld %st(i), 0 <= i <= 6 */
   2265 //.. static UChar* do_fld_st ( UChar* p, Int i )
   2266 //.. {
   2267 //..    vassert(0 <= i && i <= 6);
   2268 //..    *p++ = 0xD9;
   2269 //..    *p++ = 0xC0+i;
   2270 //..    return p;
   2271 //.. }
   2272 //..
   2273 //.. /* Emit f<op> %st(0) */
   2274 //.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op )
   2275 //.. {
   2276 //..    switch (op) {
   2277 //..       case Xfp_NEG:    *p++ = 0xD9; *p++ = 0xE0; break;
   2278 //..       case Xfp_ABS:    *p++ = 0xD9; *p++ = 0xE1; break;
   2279 //..       case Xfp_SQRT:   *p++ = 0xD9; *p++ = 0xFA; break;
   2280 //..       case Xfp_ROUND:  *p++ = 0xD9; *p++ = 0xFC; break;
   2281 //..       case Xfp_SIN:    *p++ = 0xD9; *p++ = 0xFE; break;
   2282 //..       case Xfp_COS:    *p++ = 0xD9; *p++ = 0xFF; break;
   2283 //..       case Xfp_2XM1:   *p++ = 0xD9; *p++ = 0xF0; break;
   2284 //..       case Xfp_MOV:    break;
   2285 //..       case Xfp_TAN:    p = do_ffree_st7(p); /* since fptan pushes 1.0 */
   2286 //..                        *p++ = 0xD9; *p++ = 0xF2; /* fptan */
   2287 //..                        *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
   2288 //..                        break;
   2289 //..       default: vpanic("do_fop1_st: unknown op");
   2290 //..    }
   2291 //..    return p;
   2292 //.. }
   2293 //..
   2294 //.. /* Emit f<op> %st(i), 1 <= i <= 5 */
   2295 //.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i )
   2296 //.. {
   2297 //.. #  define fake(_n) mkHReg((_n), HRcInt32, False)
   2298 //..    Int subopc;
   2299 //..    switch (op) {
   2300 //..       case Xfp_ADD: subopc = 0; break;
   2301 //..       case Xfp_SUB: subopc = 4; break;
   2302 //..       case Xfp_MUL: subopc = 1; break;
   2303 //..       case Xfp_DIV: subopc = 6; break;
   2304 //..       default: vpanic("do_fop2_st: unknown op");
   2305 //..    }
   2306 //..    *p++ = 0xD8;
   2307 //..    p    = doAMode_R(p, fake(subopc), fake(i));
   2308 //..    return p;
   2309 //.. #  undef fake
   2310 //.. }
   2311 //..
   2312 //.. /* Push a 32-bit word on the stack.  The word depends on tags[3:0];
   2313 //.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
   2314 //.. */
   2315 //.. static UChar* push_word_from_tags ( UChar* p, UShort tags )
   2316 //.. {
   2317 //..    UInt w;
   2318 //..    vassert(0 == (tags & ~0xF));
   2319 //..    if (tags == 0) {
   2320 //..       /* pushl $0x00000000 */
   2321 //..       *p++ = 0x6A;
   2322 //..       *p++ = 0x00;
   2323 //..    }
   2324 //..    else
   2325 //..    /* pushl $0xFFFFFFFF */
   2326 //..    if (tags == 0xF) {
   2327 //..       *p++ = 0x6A;
   2328 //..       *p++ = 0xFF;
   2329 //..    } else {
   2330 //..       vassert(0); /* awaiting test case */
   2331 //..       w = 0;
   2332 //..       if (tags & 1) w |= 0x000000FF;
   2333 //..       if (tags & 2) w |= 0x0000FF00;
   2334 //..       if (tags & 4) w |= 0x00FF0000;
   2335 //..       if (tags & 8) w |= 0xFF000000;
   2336 //..       *p++ = 0x68;
   2337 //..       p = emit32(p, w);
   2338 //..    }
   2339 //..    return p;
   2340 //.. }
   2341 
   2342 /* Emit an instruction into buf and return the number of bytes used.
   2343    Note that buf is not the insn's final place, and therefore it is
   2344    imperative to emit position-independent code. */
   2345 
   2346 Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
   2347                       Bool mode64,
   2348                       void* dispatch_unassisted,
   2349                       void* dispatch_assisted )
   2350 {
   2351    UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
   2352    UInt   xtra;
   2353    UInt   reg;
   2354    UChar  rex;
   2355    UChar* p = &buf[0];
   2356    UChar* ptmp;
   2357    Int    j;
   2358    vassert(nbuf >= 32);
   2359    vassert(mode64 == True);
   2360 
   2361    /* Wrap an integer as a int register, for use assembling
   2362       GrpN insns, in which the greg field is used as a sub-opcode
   2363       and does not really contain a register. */
   2364 #  define fake(_n) mkHReg((_n), HRcInt64, False)
   2365 
   2366    /* vex_printf("asm  "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
   2367 
   2368    switch (i->tag) {
   2369 
   2370    case Ain_Imm64:
   2371       if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
   2372          /* Use the short form (load into 32 bit reg, + default
   2373             widening rule) for constants under 1 million.  We could
   2374             use this form for the range 0 to 0x7FFFFFFF inclusive, but
   2375             limit it to a smaller range for verifiability purposes. */
   2376          if (1 & iregBit3(i->Ain.Imm64.dst))
   2377             *p++ = 0x41;
   2378          *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst);
   2379          p = emit32(p, (UInt)i->Ain.Imm64.imm64);
   2380       } else {
   2381          *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
   2382          *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
   2383          p = emit64(p, i->Ain.Imm64.imm64);
   2384       }
   2385       goto done;
   2386 
   2387    case Ain_Alu64R:
   2388       /* Deal specially with MOV */
   2389       if (i->Ain.Alu64R.op == Aalu_MOV) {
   2390          switch (i->Ain.Alu64R.src->tag) {
   2391             case Armi_Imm:
   2392                if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
   2393                   /* Actually we could use this form for constants in
   2394                      the range 0 through 0x7FFFFFFF inclusive, but
   2395                      limit it to a small range for verifiability
   2396                      purposes. */
   2397                   /* Generate "movl $imm32, 32-bit-register" and let
   2398                      the default zero-extend rule cause the upper half
   2399                      of the dst to be zeroed out too.  This saves 1
   2400                      and sometimes 2 bytes compared to the more
   2401                      obvious encoding in the 'else' branch. */
   2402                   if (1 & iregBit3(i->Ain.Alu64R.dst))
   2403                      *p++ = 0x41;
   2404                   *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst);
   2405                   p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
   2406                } else {
   2407                   *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst)));
   2408                   *p++ = 0xC7;
   2409                   *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst));
   2410                   p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
   2411                }
   2412                goto done;
   2413             case Armi_Reg:
   2414                *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
   2415                                   i->Ain.Alu64R.dst );
   2416                *p++ = 0x89;
   2417                p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
   2418                                 i->Ain.Alu64R.dst);
   2419                goto done;
   2420             case Armi_Mem:
   2421                *p++ = rexAMode_M(i->Ain.Alu64R.dst,
   2422                                  i->Ain.Alu64R.src->Armi.Mem.am);
   2423                *p++ = 0x8B;
   2424                p = doAMode_M(p, i->Ain.Alu64R.dst,
   2425                                 i->Ain.Alu64R.src->Armi.Mem.am);
   2426                goto done;
   2427             default:
   2428                goto bad;
   2429          }
   2430       }
   2431       /* MUL */
   2432       if (i->Ain.Alu64R.op == Aalu_MUL) {
   2433          switch (i->Ain.Alu64R.src->tag) {
   2434             case Armi_Reg:
   2435                *p++ = rexAMode_R( i->Ain.Alu64R.dst,
   2436                                   i->Ain.Alu64R.src->Armi.Reg.reg);
   2437                *p++ = 0x0F;
   2438                *p++ = 0xAF;
   2439                p = doAMode_R(p, i->Ain.Alu64R.dst,
   2440                                 i->Ain.Alu64R.src->Armi.Reg.reg);
   2441                goto done;
   2442             case Armi_Mem:
   2443                *p++ = rexAMode_M(i->Ain.Alu64R.dst,
   2444                                  i->Ain.Alu64R.src->Armi.Mem.am);
   2445                *p++ = 0x0F;
   2446                *p++ = 0xAF;
   2447                p = doAMode_M(p, i->Ain.Alu64R.dst,
   2448                                 i->Ain.Alu64R.src->Armi.Mem.am);
   2449                goto done;
   2450             case Armi_Imm:
   2451                if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
   2452                   *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
   2453                   *p++ = 0x6B;
   2454                   p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
   2455                   *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
   2456                } else {
   2457                   *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
   2458                   *p++ = 0x69;
   2459                   p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
   2460                   p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
   2461                }
   2462                goto done;
   2463             default:
   2464                goto bad;
   2465          }
   2466       }
   2467       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
   2468       opc = opc_rr = subopc_imm = opc_imma = 0;
   2469       switch (i->Ain.Alu64R.op) {
   2470          case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
   2471                         subopc_imm = 2; opc_imma = 0x15; break;
   2472          case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
   2473                         subopc_imm = 0; opc_imma = 0x05; break;
   2474          case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
   2475                         subopc_imm = 5; opc_imma = 0x2D; break;
   2476          case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
   2477                         subopc_imm = 3; opc_imma = 0x1D; break;
   2478          case Aalu_AND: opc = 0x23; opc_rr = 0x21;
   2479                         subopc_imm = 4; opc_imma = 0x25; break;
   2480          case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
   2481                         subopc_imm = 6; opc_imma = 0x35; break;
   2482          case Aalu_OR:  opc = 0x0B; opc_rr = 0x09;
   2483                         subopc_imm = 1; opc_imma = 0x0D; break;
   2484          case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
   2485                         subopc_imm = 7; opc_imma = 0x3D; break;
   2486          default: goto bad;
   2487       }
   2488       switch (i->Ain.Alu64R.src->tag) {
   2489          case Armi_Imm:
   2490             if (i->Ain.Alu64R.dst == hregAMD64_RAX()
   2491                 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
   2492                goto bad; /* FIXME: awaiting test case */
   2493                *p++ = toUChar(opc_imma);
   2494                p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
   2495             } else
   2496             if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
   2497                *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst );
   2498                *p++ = 0x83;
   2499                p    = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
   2500                *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
   2501             } else {
   2502                *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst);
   2503                *p++ = 0x81;
   2504                p    = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
   2505                p    = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
   2506             }
   2507             goto done;
   2508          case Armi_Reg:
   2509             *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
   2510                                i->Ain.Alu64R.dst);
   2511             *p++ = toUChar(opc_rr);
   2512             p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
   2513                              i->Ain.Alu64R.dst);
   2514             goto done;
   2515          case Armi_Mem:
   2516             *p++ = rexAMode_M( i->Ain.Alu64R.dst,
   2517                                i->Ain.Alu64R.src->Armi.Mem.am);
   2518             *p++ = toUChar(opc);
   2519             p = doAMode_M(p, i->Ain.Alu64R.dst,
   2520                              i->Ain.Alu64R.src->Armi.Mem.am);
   2521             goto done;
   2522          default:
   2523             goto bad;
   2524       }
   2525       break;
   2526 
   2527    case Ain_Alu64M:
   2528       /* Deal specially with MOV */
   2529       if (i->Ain.Alu64M.op == Aalu_MOV) {
   2530          switch (i->Ain.Alu64M.src->tag) {
   2531             case Ari_Reg:
   2532                *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
   2533                                  i->Ain.Alu64M.dst);
   2534                *p++ = 0x89;
   2535                p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
   2536                                 i->Ain.Alu64M.dst);
   2537                goto done;
   2538             case Ari_Imm:
   2539                *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst);
   2540                *p++ = 0xC7;
   2541                p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst);
   2542                p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
   2543                goto done;
   2544             default:
   2545                goto bad;
   2546          }
   2547       }
   2548 //..       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP.  MUL is not
   2549 //..          allowed here. */
   2550 //..       opc = subopc_imm = opc_imma = 0;
   2551 //..       switch (i->Xin.Alu32M.op) {
   2552 //..          case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
   2553 //..          case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
   2554 //..          default: goto bad;
   2555 //..       }
   2556 //..       switch (i->Xin.Alu32M.src->tag) {
   2557 //..          case Xri_Reg:
   2558 //..             *p++ = opc;
   2559 //..             p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2560 //..                              i->Xin.Alu32M.dst);
   2561 //..             goto done;
   2562 //..          case Xri_Imm:
   2563 //..             if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
   2564 //..                *p++ = 0x83;
   2565 //..                p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
   2566 //..                *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32;
   2567 //..                goto done;
   2568 //..             } else {
   2569 //..                *p++ = 0x81;
   2570 //..                p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
   2571 //..                p    = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2572 //..                goto done;
   2573 //..             }
   2574 //..          default:
   2575 //..             goto bad;
   2576 //..       }
   2577       break;
   2578 
   2579    case Ain_Sh64:
   2580       opc_cl = opc_imm = subopc = 0;
   2581       switch (i->Ain.Sh64.op) {
   2582          case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
   2583          case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
   2584          case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
   2585          default: goto bad;
   2586       }
   2587       if (i->Ain.Sh64.src == 0) {
   2588          *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
   2589          *p++ = toUChar(opc_cl);
   2590          p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
   2591          goto done;
   2592       } else {
   2593          *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
   2594          *p++ = toUChar(opc_imm);
   2595          p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
   2596          *p++ = (UChar)(i->Ain.Sh64.src);
   2597          goto done;
   2598       }
   2599       break;
   2600 
   2601    case Ain_Test64:
   2602       /* testq sign-extend($imm32), %reg */
   2603       *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst);
   2604       *p++ = 0xF7;
   2605       p = doAMode_R(p, fake(0), i->Ain.Test64.dst);
   2606       p = emit32(p, i->Ain.Test64.imm32);
   2607       goto done;
   2608 
   2609    case Ain_Unary64:
   2610       if (i->Ain.Unary64.op == Aun_NOT) {
   2611          *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
   2612          *p++ = 0xF7;
   2613          p = doAMode_R(p, fake(2), i->Ain.Unary64.dst);
   2614          goto done;
   2615       }
   2616       if (i->Ain.Unary64.op == Aun_NEG) {
   2617          *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
   2618          *p++ = 0xF7;
   2619          p = doAMode_R(p, fake(3), i->Ain.Unary64.dst);
   2620          goto done;
   2621       }
   2622       break;
   2623 
   2624    case Ain_Lea64:
   2625       *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
   2626       *p++ = 0x8D;
   2627       p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
   2628       goto done;
   2629 
   2630    case Ain_Alu32R:
   2631       /* ADD/SUB/AND/OR/XOR/CMP */
   2632       opc = opc_rr = subopc_imm = opc_imma = 0;
   2633       switch (i->Ain.Alu32R.op) {
   2634          case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
   2635                         subopc_imm = 0; opc_imma = 0x05; break;
   2636          case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
   2637                         subopc_imm = 5; opc_imma = 0x2D; break;
   2638          case Aalu_AND: opc = 0x23; opc_rr = 0x21;
   2639                         subopc_imm = 4; opc_imma = 0x25; break;
   2640          case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
   2641                         subopc_imm = 6; opc_imma = 0x35; break;
   2642          case Aalu_OR:  opc = 0x0B; opc_rr = 0x09;
   2643                         subopc_imm = 1; opc_imma = 0x0D; break;
   2644          case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
   2645                         subopc_imm = 7; opc_imma = 0x3D; break;
   2646          default: goto bad;
   2647       }
   2648       switch (i->Ain.Alu32R.src->tag) {
   2649          case Armi_Imm:
   2650             if (i->Ain.Alu32R.dst == hregAMD64_RAX()
   2651                 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
   2652                goto bad; /* FIXME: awaiting test case */
   2653                *p++ = toUChar(opc_imma);
   2654                p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
   2655             } else
   2656             if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
   2657                rex  = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) );
   2658                if (rex != 0x40) *p++ = rex;
   2659                *p++ = 0x83;
   2660                p    = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
   2661                *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
   2662             } else {
   2663                rex  = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) );
   2664                if (rex != 0x40) *p++ = rex;
   2665                *p++ = 0x81;
   2666                p    = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
   2667                p    = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
   2668             }
   2669             goto done;
   2670          case Armi_Reg:
   2671             rex  = clearWBit(
   2672                    rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
   2673                                i->Ain.Alu32R.dst) );
   2674             if (rex != 0x40) *p++ = rex;
   2675             *p++ = toUChar(opc_rr);
   2676             p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
   2677                              i->Ain.Alu32R.dst);
   2678             goto done;
   2679          case Armi_Mem:
   2680             rex  = clearWBit(
   2681                    rexAMode_M( i->Ain.Alu32R.dst,
   2682                                i->Ain.Alu32R.src->Armi.Mem.am) );
   2683             if (rex != 0x40) *p++ = rex;
   2684             *p++ = toUChar(opc);
   2685             p = doAMode_M(p, i->Ain.Alu32R.dst,
   2686                              i->Ain.Alu32R.src->Armi.Mem.am);
   2687             goto done;
   2688          default:
   2689             goto bad;
   2690       }
   2691       break;
   2692 
   2693    case Ain_MulL:
   2694       subopc = i->Ain.MulL.syned ? 5 : 4;
   2695       switch (i->Ain.MulL.src->tag)  {
   2696          case Arm_Mem:
   2697             *p++ = rexAMode_M( fake(0),
   2698                                i->Ain.MulL.src->Arm.Mem.am);
   2699             *p++ = 0xF7;
   2700             p = doAMode_M(p, fake(subopc),
   2701                              i->Ain.MulL.src->Arm.Mem.am);
   2702             goto done;
   2703          case Arm_Reg:
   2704             *p++ = rexAMode_R(fake(0),
   2705                               i->Ain.MulL.src->Arm.Reg.reg);
   2706             *p++ = 0xF7;
   2707             p = doAMode_R(p, fake(subopc),
   2708                              i->Ain.MulL.src->Arm.Reg.reg);
   2709             goto done;
   2710          default:
   2711             goto bad;
   2712       }
   2713       break;
   2714 
   2715    case Ain_Div:
   2716       subopc = i->Ain.Div.syned ? 7 : 6;
   2717       if (i->Ain.Div.sz == 4) {
   2718          switch (i->Ain.Div.src->tag)  {
   2719             case Arm_Mem:
   2720                goto bad;
   2721                /*FIXME*/
   2722                *p++ = 0xF7;
   2723                p = doAMode_M(p, fake(subopc),
   2724                                 i->Ain.Div.src->Arm.Mem.am);
   2725                goto done;
   2726             case Arm_Reg:
   2727                *p++ = clearWBit(
   2728                       rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg));
   2729                *p++ = 0xF7;
   2730                p = doAMode_R(p, fake(subopc),
   2731                                 i->Ain.Div.src->Arm.Reg.reg);
   2732                goto done;
   2733             default:
   2734                goto bad;
   2735          }
   2736       }
   2737       if (i->Ain.Div.sz == 8) {
   2738          switch (i->Ain.Div.src->tag)  {
   2739             case Arm_Mem:
   2740                *p++ = rexAMode_M( fake(0),
   2741                                   i->Ain.Div.src->Arm.Mem.am);
   2742                *p++ = 0xF7;
   2743                p = doAMode_M(p, fake(subopc),
   2744                                 i->Ain.Div.src->Arm.Mem.am);
   2745                goto done;
   2746             case Arm_Reg:
   2747                *p++ = rexAMode_R( fake(0),
   2748                                   i->Ain.Div.src->Arm.Reg.reg);
   2749                *p++ = 0xF7;
   2750                p = doAMode_R(p, fake(subopc),
   2751                                 i->Ain.Div.src->Arm.Reg.reg);
   2752                goto done;
   2753             default:
   2754                goto bad;
   2755          }
   2756       }
   2757       break;
   2758 
   2759 //..    case Xin_Sh3232:
   2760 //..       vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
   2761 //..       if (i->Xin.Sh3232.amt == 0) {
   2762 //..          /* shldl/shrdl by %cl */
   2763 //..          *p++ = 0x0F;
   2764 //..          if (i->Xin.Sh3232.op == Xsh_SHL) {
   2765 //..             *p++ = 0xA5;
   2766 //..          } else {
   2767 //..             *p++ = 0xAD;
   2768 //..          }
   2769 //..          p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
   2770 //..          goto done;
   2771 //..       }
   2772 //..       break;
   2773 
   2774    case Ain_Push:
   2775       switch (i->Ain.Push.src->tag) {
   2776          case Armi_Mem:
   2777             *p++ = clearWBit(
   2778                    rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am));
   2779             *p++ = 0xFF;
   2780             p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
   2781             goto done;
   2782          case Armi_Imm:
   2783             *p++ = 0x68;
   2784             p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
   2785             goto done;
   2786          case Armi_Reg:
   2787             *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg)));
   2788             *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg));
   2789             goto done;
   2790         default:
   2791             goto bad;
   2792       }
   2793 
   2794    case Ain_Call: {
   2795       /* As per detailed comment for Ain_Call in
   2796          getRegUsage_AMD64Instr above, %r11 is used as an address
   2797          temporary. */
   2798       /* jump over the following two insns if the condition does not
   2799          hold */
   2800       Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
   2801       if (i->Ain.Call.cond != Acc_ALWAYS) {
   2802          *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
   2803          *p++ = shortImm ? 10 : 13;
   2804          /* 10 or 13 bytes in the next two insns */
   2805       }
   2806       if (shortImm) {
   2807          /* 7 bytes: movl sign-extend(imm32), %r11 */
   2808          *p++ = 0x49;
   2809          *p++ = 0xC7;
   2810          *p++ = 0xC3;
   2811          p = emit32(p, (UInt)i->Ain.Call.target);
   2812       } else {
   2813          /* 10 bytes: movabsq $target, %r11 */
   2814          *p++ = 0x49;
   2815          *p++ = 0xBB;
   2816          p = emit64(p, i->Ain.Call.target);
   2817       }
   2818       /* 3 bytes: call *%r11 */
   2819       *p++ = 0x41;
   2820       *p++ = 0xFF;
   2821       *p++ = 0xD3;
   2822       goto done;
   2823    }
   2824 
   2825    case Ain_Goto: {
   2826       void* dispatch_to_use = NULL;
   2827       vassert(dispatch_unassisted != NULL);
   2828       vassert(dispatch_assisted != NULL);
   2829 
   2830       /* Use ptmp for backpatching conditional jumps. */
   2831       ptmp = NULL;
   2832 
   2833       /* First off, if this is conditional, create a conditional
   2834          jump over the rest of it. */
   2835       if (i->Ain.Goto.cond != Acc_ALWAYS) {
   2836          /* jmp fwds if !condition */
   2837          *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1));
   2838          ptmp = p; /* fill in this bit later */
   2839          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2840       }
   2841 
   2842       /* If a non-boring, set %rbp (the guest state pointer)
   2843          appropriately.  Since these numbers are all small positive
   2844          integers, we can get away with "movl $N, %ebp" rather than
   2845          the longer "movq $N, %rbp".  Also, decide which dispatcher we
   2846          need to use. */
   2847       dispatch_to_use = dispatch_assisted;
   2848 
   2849       /* movl $magic_number, %ebp */
   2850       switch (i->Ain.Goto.jk) {
   2851          case Ijk_ClientReq:
   2852             *p++ = 0xBD;
   2853             p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
   2854          case Ijk_Sys_syscall:
   2855             *p++ = 0xBD;
   2856             p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break;
   2857          case Ijk_Sys_int32:
   2858             *p++ = 0xBD;
   2859             p = emit32(p, VEX_TRC_JMP_SYS_INT32); break;
   2860          case Ijk_Yield:
   2861             *p++ = 0xBD;
   2862             p = emit32(p, VEX_TRC_JMP_YIELD); break;
   2863          case Ijk_YieldNoRedir:
   2864             *p++ = 0xBD;
   2865             p = emit32(p, VEX_TRC_JMP_YIELD_NOREDIR); break;
   2866          case Ijk_EmWarn:
   2867             *p++ = 0xBD;
   2868             p = emit32(p, VEX_TRC_JMP_EMWARN); break;
   2869          case Ijk_MapFail:
   2870             *p++ = 0xBD;
   2871             p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
   2872          case Ijk_NoDecode:
   2873             *p++ = 0xBD;
   2874             p = emit32(p, VEX_TRC_JMP_NODECODE); break;
   2875          case Ijk_TInval:
   2876             *p++ = 0xBD;
   2877             p = emit32(p, VEX_TRC_JMP_TINVAL); break;
   2878          case Ijk_NoRedir:
   2879             *p++ = 0xBD;
   2880             p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
   2881          case Ijk_SigTRAP:
   2882             *p++ = 0xBD;
   2883             p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
   2884          case Ijk_SigSEGV:
   2885             *p++ = 0xBD;
   2886             p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
   2887          case Ijk_Ret:
   2888          case Ijk_Call:
   2889          case Ijk_Boring:
   2890             dispatch_to_use = dispatch_unassisted;
   2891             break;
   2892          default:
   2893             ppIRJumpKind(i->Ain.Goto.jk);
   2894             vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind");
   2895       }
   2896 
   2897       /* Get the destination address into %rax */
   2898       if (i->Ain.Goto.dst->tag == Ari_Imm) {
   2899          /* movl sign-ext($immediate), %rax ; ret */
   2900          *p++ = 0x48;
   2901          *p++ = 0xC7;
   2902          *p++ = 0xC0;
   2903          p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32);
   2904       } else {
   2905          vassert(i->Ain.Goto.dst->tag == Ari_Reg);
   2906          /* movq %reg, %rax ; ret */
   2907          if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) {
   2908             *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
   2909             *p++ = 0x89;
   2910             p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
   2911          }
   2912       }
   2913 
   2914       /* Get the dispatcher address into %rdx.  This has to happen
   2915          after the load of %rax since %rdx might be carrying the value
   2916          destined for %rax immediately prior to this Ain_Goto. */
   2917       vassert(sizeof(ULong) == sizeof(void*));
   2918 
   2919       if (fitsIn32Bits(Ptr_to_ULong(dispatch_to_use))) {
   2920          /* movl sign-extend(imm32), %rdx */
   2921          *p++ = 0x48;
   2922          *p++ = 0xC7;
   2923          *p++ = 0xC2;
   2924          p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use));
   2925       } else {
   2926          /* movabsq $imm64, %rdx */
   2927          *p++ = 0x48;
   2928          *p++ = 0xBA;
   2929          p = emit64(p, Ptr_to_ULong(dispatch_to_use));
   2930       }
   2931       /* jmp *%rdx */
   2932       *p++ = 0xFF;
   2933       *p++ = 0xE2;
   2934 
   2935       /* Fix up the conditional jump, if there was one. */
   2936       if (i->Ain.Goto.cond != Acc_ALWAYS) {
   2937          Int delta = p - ptmp;
   2938          vassert(delta > 0 && delta < 30);
   2939          *ptmp = toUChar(delta-1);
   2940       }
   2941       goto done;
   2942    }
   2943 
   2944    case Ain_CMov64:
   2945       vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
   2946       if (i->Ain.CMov64.src->tag == Arm_Reg) {
   2947          *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
   2948          *p++ = 0x0F;
   2949          *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
   2950          p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
   2951          goto done;
   2952       }
   2953       if (i->Ain.CMov64.src->tag == Arm_Mem) {
   2954          *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
   2955          *p++ = 0x0F;
   2956          *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
   2957          p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
   2958          goto done;
   2959       }
   2960       break;
   2961 
   2962    case Ain_MovxLQ:
   2963       /* No, _don't_ ask me why the sense of the args has to be
   2964          different in the S vs Z case.  I don't know. */
   2965       if (i->Ain.MovxLQ.syned) {
   2966          /* Need REX.W = 1 here, but rexAMode_R does that for us. */
   2967          *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
   2968          *p++ = 0x63;
   2969          p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
   2970       } else {
   2971          /* Produce a 32-bit reg-reg move, since the implicit
   2972             zero-extend does what we want. */
   2973          *p++ = clearWBit (
   2974                    rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
   2975          *p++ = 0x89;
   2976          p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
   2977       }
   2978       goto done;
   2979 
   2980    case Ain_LoadEX:
   2981       if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
   2982          /* movzbq */
   2983          *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
   2984          *p++ = 0x0F;
   2985          *p++ = 0xB6;
   2986          p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
   2987          goto done;
   2988       }
   2989       if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
   2990          /* movzwq */
   2991          *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
   2992          *p++ = 0x0F;
   2993          *p++ = 0xB7;
   2994          p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
   2995          goto done;
   2996       }
   2997       if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
   2998          /* movzlq */
   2999          /* This isn't really an existing AMD64 instruction per se.
   3000             Rather, we have to do a 32-bit load.  Because a 32-bit
   3001             write implicitly clears the upper 32 bits of the target
   3002             register, we get what we want. */
   3003          *p++ = clearWBit(
   3004                 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
   3005          *p++ = 0x8B;
   3006          p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
   3007          goto done;
   3008       }
   3009       break;
   3010 
   3011    case Ain_Set64:
   3012       /* Make the destination register be 1 or 0, depending on whether
   3013          the relevant condition holds.  Complication: the top 56 bits
   3014          of the destination should be forced to zero, but doing 'xorq
   3015          %r,%r' kills the flag(s) we are about to read.  Sigh.  So
   3016          start off my moving $0 into the dest. */
   3017       reg = iregBits3210(i->Ain.Set64.dst);
   3018       vassert(reg < 16);
   3019 
   3020       /* movq $0, %dst */
   3021       *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
   3022       *p++ = 0xC7;
   3023       *p++ = toUChar(0xC0 + (reg & 7));
   3024       p = emit32(p, 0);
   3025 
   3026       /* setb lo8(%dst) */
   3027       /* note, 8-bit register rex trickyness.  Be careful here. */
   3028       *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
   3029       *p++ = 0x0F;
   3030       *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
   3031       *p++ = toUChar(0xC0 + (reg & 7));
   3032       goto done;
   3033 
   3034    case Ain_Bsfr64:
   3035       *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
   3036       *p++ = 0x0F;
   3037       if (i->Ain.Bsfr64.isFwds) {
   3038          *p++ = 0xBC;
   3039       } else {
   3040          *p++ = 0xBD;
   3041       }
   3042       p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
   3043       goto done;
   3044 
   3045    case Ain_MFence:
   3046       /* mfence */
   3047       *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
   3048       goto done;
   3049 
   3050    case Ain_ACAS:
   3051       /* lock */
   3052       *p++ = 0xF0;
   3053       if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
   3054       /* cmpxchg{b,w,l,q} %rbx,mem.  Expected-value in %rax, new value
   3055          in %rbx.  The new-value register is hardwired to be %rbx
   3056          since dealing with byte integer registers is too much hassle,
   3057          so we force the register operand to %rbx (could equally be
   3058          %rcx or %rdx). */
   3059       rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
   3060       if (i->Ain.ACAS.sz != 8)
   3061          rex = clearWBit(rex);
   3062 
   3063       *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
   3064       *p++ = 0x0F;
   3065       if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
   3066       p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
   3067       goto done;
   3068 
   3069    case Ain_DACAS:
   3070       /* lock */
   3071       *p++ = 0xF0;
   3072       /* cmpxchg{8,16}b m{64,128}.  Expected-value in %rdx:%rax, new
   3073          value in %rcx:%rbx.  All 4 regs are hardwired in the ISA, so
   3074          aren't encoded in the insn. */
   3075       rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
   3076       if (i->Ain.ACAS.sz != 8)
   3077          rex = clearWBit(rex);
   3078       *p++ = rex;
   3079       *p++ = 0x0F;
   3080       *p++ = 0xC7;
   3081       p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
   3082       goto done;
   3083 
   3084    case Ain_A87Free:
   3085       vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
   3086       for (j = 0; j < i->Ain.A87Free.nregs; j++) {
   3087          p = do_ffree_st(p, 7-j);
   3088       }
   3089       goto done;
   3090 
   3091    case Ain_A87PushPop:
   3092       vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
   3093       if (i->Ain.A87PushPop.isPush) {
   3094          /* Load from memory into %st(0): flds/fldl amode */
   3095          *p++ = clearWBit(
   3096                    rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
   3097          *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
   3098 	 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
   3099       } else {
   3100          /* Dump %st(0) to memory: fstps/fstpl amode */
   3101          *p++ = clearWBit(
   3102                    rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
   3103          *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
   3104          p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
   3105          goto done;
   3106       }
   3107       goto done;
   3108 
   3109    case Ain_A87FpOp:
   3110       switch (i->Ain.A87FpOp.op) {
   3111          case Afp_SQRT:   *p++ = 0xD9; *p++ = 0xFA; break;
   3112          case Afp_SIN:    *p++ = 0xD9; *p++ = 0xFE; break;
   3113          case Afp_COS:    *p++ = 0xD9; *p++ = 0xFF; break;
   3114          case Afp_TAN:    *p++ = 0xD9; *p++ = 0xF2; break;
   3115          case Afp_ROUND:  *p++ = 0xD9; *p++ = 0xFC; break;
   3116          case Afp_2XM1:   *p++ = 0xD9; *p++ = 0xF0; break;
   3117          case Afp_SCALE:  *p++ = 0xD9; *p++ = 0xFD; break;
   3118          case Afp_ATAN:   *p++ = 0xD9; *p++ = 0xF3; break;
   3119          case Afp_YL2X:   *p++ = 0xD9; *p++ = 0xF1; break;
   3120          case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
   3121          case Afp_PREM:   *p++ = 0xD9; *p++ = 0xF8; break;
   3122          case Afp_PREM1:  *p++ = 0xD9; *p++ = 0xF5; break;
   3123          default: goto bad;
   3124       }
   3125       goto done;
   3126 
   3127    case Ain_A87LdCW:
   3128       *p++ = clearWBit(
   3129                 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
   3130       *p++ = 0xD9;
   3131       p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
   3132       goto done;
   3133 
   3134    case Ain_A87StSW:
   3135       *p++ = clearWBit(
   3136                 rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
   3137       *p++ = 0xDD;
   3138       p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
   3139       goto done;
   3140 
   3141    case Ain_Store:
   3142       if (i->Ain.Store.sz == 2) {
   3143          /* This just goes to show the crazyness of the instruction
   3144             set encoding.  We have to insert two prefix bytes, but be
   3145             careful to avoid a conflict in what the size should be, by
   3146             ensuring that REX.W = 0. */
   3147          *p++ = 0x66; /* override to 16-bits */
   3148 	 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
   3149          *p++ = 0x89;
   3150          p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
   3151          goto done;
   3152       }
   3153       if (i->Ain.Store.sz == 4) {
   3154 	 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
   3155          *p++ = 0x89;
   3156          p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
   3157          goto done;
   3158       }
   3159       if (i->Ain.Store.sz == 1) {
   3160          /* This is one place where it would be wrong to skip emitting
   3161             a rex byte of 0x40, since the mere presence of rex changes
   3162             the meaning of the byte register access.  Be careful. */
   3163 	 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
   3164          *p++ = 0x88;
   3165          p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
   3166          goto done;
   3167       }
   3168       break;
   3169 
   3170 //..    case Xin_FpUnary:
   3171 //..       /* gop %src, %dst
   3172 //..          --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
   3173 //..       */
   3174 //..       p = do_ffree_st7(p);
   3175 //..       p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
   3176 //..       p = do_fop1_st(p, i->Xin.FpUnary.op);
   3177 //..       p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
   3178 //..       goto done;
   3179 //..
   3180 //..    case Xin_FpBinary:
   3181 //..       if (i->Xin.FpBinary.op == Xfp_YL2X
   3182 //..           || i->Xin.FpBinary.op == Xfp_YL2XP1) {
   3183 //..          /* Have to do this specially. */
   3184 //..          /* ffree %st7 ; fld %st(srcL) ;
   3185 //..             ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
   3186 //..          p = do_ffree_st7(p);
   3187 //..          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   3188 //..          p = do_ffree_st7(p);
   3189 //..          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
   3190 //..          *p++ = 0xD9;
   3191 //..          *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9;
   3192 //..          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   3193 //..          goto done;
   3194 //..       }
   3195 //..       if (i->Xin.FpBinary.op == Xfp_ATAN) {
   3196 //..          /* Have to do this specially. */
   3197 //..          /* ffree %st7 ; fld %st(srcL) ;
   3198 //..             ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
   3199 //..          p = do_ffree_st7(p);
   3200 //..          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   3201 //..          p = do_ffree_st7(p);
   3202 //..          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
   3203 //..          *p++ = 0xD9; *p++ = 0xF3;
   3204 //..          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   3205 //..          goto done;
   3206 //..       }
   3207 //..       if (i->Xin.FpBinary.op == Xfp_PREM
   3208 //..           || i->Xin.FpBinary.op == Xfp_PREM1
   3209 //..           || i->Xin.FpBinary.op == Xfp_SCALE) {
   3210 //..          /* Have to do this specially. */
   3211 //..          /* ffree %st7 ; fld %st(srcR) ;
   3212 //..             ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
   3213 //..             fincstp ; ffree %st7 */
   3214 //..          p = do_ffree_st7(p);
   3215 //..          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
   3216 //..          p = do_ffree_st7(p);
   3217 //..          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
   3218 //..          *p++ = 0xD9;
   3219 //..          switch (i->Xin.FpBinary.op) {
   3220 //..             case Xfp_PREM: *p++ = 0xF8; break;
   3221 //..             case Xfp_PREM1: *p++ = 0xF5; break;
   3222 //..             case Xfp_SCALE: *p++ =  0xFD; break;
   3223 //..             default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)");
   3224 //..          }
   3225 //..          p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
   3226 //..          *p++ = 0xD9; *p++ = 0xF7;
   3227 //..          p = do_ffree_st7(p);
   3228 //..          goto done;
   3229 //..       }
   3230 //..       /* General case */
   3231 //..       /* gop %srcL, %srcR, %dst
   3232 //..          --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
   3233 //..       */
   3234 //..       p = do_ffree_st7(p);
   3235 //..       p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   3236 //..       p = do_fop2_st(p, i->Xin.FpBinary.op,
   3237 //..                         1+hregNumber(i->Xin.FpBinary.srcR));
   3238 //..       p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   3239 //..       goto done;
   3240 //..
   3241 //..    case Xin_FpLdSt:
   3242 //..       vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8);
   3243 //..       if (i->Xin.FpLdSt.isLoad) {
   3244 //..          /* Load from memory into %fakeN.
   3245 //..             --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1)
   3246 //..          */
   3247 //..          p = do_ffree_st7(p);
   3248 //..          *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
   3249 //.. 	 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
   3250 //..          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
   3251 //..          goto done;
   3252 //..       } else {
   3253 //..          /* Store from %fakeN into memory.
   3254 //..             --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
   3255 //.. 	 */
   3256 //..          p = do_ffree_st7(p);
   3257 //..          p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
   3258 //..          *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
   3259 //..          p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
   3260 //..          goto done;
   3261 //..       }
   3262 //..       break;
   3263 //..
   3264 //..    case Xin_FpLdStI:
   3265 //..       if (i->Xin.FpLdStI.isLoad) {
   3266 //..          /* Load from memory into %fakeN, converting from an int.
   3267 //..             --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
   3268 //..          */
   3269 //..          switch (i->Xin.FpLdStI.sz) {
   3270 //..             case 8:  opc = 0xDF; subopc_imm = 5; break;
   3271 //..             case 4:  opc = 0xDB; subopc_imm = 0; break;
   3272 //..             case 2:  vassert(0); opc = 0xDF; subopc_imm = 0; break;
   3273 //..             default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)");
   3274 //..          }
   3275 //..          p = do_ffree_st7(p);
   3276 //..          *p++ = opc;
   3277 //..          p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
   3278 //..          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
   3279 //..          goto done;
   3280 //..       } else {
   3281 //..          /* Store from %fakeN into memory, converting to an int.
   3282 //..             --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
   3283 //.. 	 */
   3284 //..          switch (i->Xin.FpLdStI.sz) {
   3285 //..             case 8:  opc = 0xDF; subopc_imm = 7; break;
   3286 //..             case 4:  opc = 0xDB; subopc_imm = 3; break;
   3287 //..             case 2:  opc = 0xDF; subopc_imm = 3; break;
   3288 //..             default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)");
   3289 //..          }
   3290 //..          p = do_ffree_st7(p);
   3291 //..          p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
   3292 //..          *p++ = opc;
   3293 //..          p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
   3294 //..          goto done;
   3295 //..       }
   3296 //..       break;
   3297 //..
   3298 //..    case Xin_Fp64to32:
   3299 //..       /* ffree %st7 ; fld %st(src) */
   3300 //..       p = do_ffree_st7(p);
   3301 //..       p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
   3302 //..       /* subl $4, %esp */
   3303 //..       *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
   3304 //..       /* fstps (%esp) */
   3305 //..       *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
   3306 //..       /* flds (%esp) */
   3307 //..       *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
   3308 //..       /* addl $4, %esp */
   3309 //..       *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
   3310 //..       /* fstp %st(1+dst) */
   3311 //..       p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
   3312 //..       goto done;
   3313 //..
   3314 //..    case Xin_FpCMov:
   3315 //..       /* jmp fwds if !condition */
   3316 //..       *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1);
   3317 //..       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   3318 //..       ptmp = p;
   3319 //..
   3320 //..       /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
   3321 //..       p = do_ffree_st7(p);
   3322 //..       p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
   3323 //..       p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
   3324 //..
   3325 //..       /* Fill in the jump offset. */
   3326 //..       *(ptmp-1) = p - ptmp;
   3327 //..       goto done;
   3328 
   3329    case Ain_LdMXCSR:
   3330       *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
   3331       *p++ = 0x0F;
   3332       *p++ = 0xAE;
   3333       p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
   3334       goto done;
   3335 
   3336 //..    case Xin_FpStSW_AX:
   3337 //..       /* note, this emits fnstsw %ax, not fstsw %ax */
   3338 //..       *p++ = 0xDF;
   3339 //..       *p++ = 0xE0;
   3340 //..       goto done;
   3341 
   3342    case Ain_SseUComIS:
   3343       /* ucomi[sd] %srcL, %srcR ;  pushfq ; popq %dst */
   3344       /* ucomi[sd] %srcL, %srcR */
   3345       if (i->Ain.SseUComIS.sz == 8) {
   3346          *p++ = 0x66;
   3347       } else {
   3348          goto bad;
   3349          vassert(i->Ain.SseUComIS.sz == 4);
   3350       }
   3351       *p++ = clearWBit (
   3352              rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL),
   3353                          vreg2ireg(i->Ain.SseUComIS.srcR) ));
   3354       *p++ = 0x0F;
   3355       *p++ = 0x2E;
   3356       p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL),
   3357                        vreg2ireg(i->Ain.SseUComIS.srcR) );
   3358       /* pushfq */
   3359       *p++ = 0x9C;
   3360       /* popq %dst */
   3361       *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst)));
   3362       *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst));
   3363       goto done;
   3364 
   3365    case Ain_SseSI2SF:
   3366       /* cvssi2s[sd] %src, %dst */
   3367       rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
   3368                         i->Ain.SseSI2SF.src );
   3369       *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
   3370       *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
   3371       *p++ = 0x0F;
   3372       *p++ = 0x2A;
   3373       p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
   3374                         i->Ain.SseSI2SF.src );
   3375       goto done;
   3376 
   3377    case Ain_SseSF2SI:
   3378       /* cvss[sd]2si %src, %dst */
   3379       rex = rexAMode_R( i->Ain.SseSF2SI.dst,
   3380                         vreg2ireg(i->Ain.SseSF2SI.src) );
   3381       *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
   3382       *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
   3383       *p++ = 0x0F;
   3384       *p++ = 0x2D;
   3385       p = doAMode_R( p, i->Ain.SseSF2SI.dst,
   3386                         vreg2ireg(i->Ain.SseSF2SI.src) );
   3387       goto done;
   3388 
   3389    case Ain_SseSDSS:
   3390       /* cvtsd2ss/cvtss2sd %src, %dst */
   3391       *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
   3392       *p++ = clearWBit(
   3393               rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst),
   3394                           vreg2ireg(i->Ain.SseSDSS.src) ));
   3395       *p++ = 0x0F;
   3396       *p++ = 0x5A;
   3397       p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst),
   3398                         vreg2ireg(i->Ain.SseSDSS.src) );
   3399       goto done;
   3400 
   3401 //..
   3402 //..    case Xin_FpCmp:
   3403 //..       /* gcmp %fL, %fR, %dst
   3404 //..          -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
   3405 //..             fnstsw %ax ; movl %eax, %dst
   3406 //..       */
   3407 //..       /* ffree %st7 */
   3408 //..       p = do_ffree_st7(p);
   3409 //..       /* fpush %fL */
   3410 //..       p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
   3411 //..       /* fucomp %(fR+1) */
   3412 //..       *p++ = 0xDD;
   3413 //..       *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)));
   3414 //..       /* fnstsw %ax */
   3415 //..       *p++ = 0xDF;
   3416 //..       *p++ = 0xE0;
   3417 //..       /*  movl %eax, %dst */
   3418 //..       *p++ = 0x89;
   3419 //..       p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst);
   3420 //..       goto done;
   3421 //..
   3422 //..    case Xin_SseConst: {
   3423 //..       UShort con = i->Xin.SseConst.con;
   3424 //..       p = push_word_from_tags(p, (con >> 12) & 0xF);
   3425 //..       p = push_word_from_tags(p, (con >> 8) & 0xF);
   3426 //..       p = push_word_from_tags(p, (con >> 4) & 0xF);
   3427 //..       p = push_word_from_tags(p, con & 0xF);
   3428 //..       /* movl (%esp), %xmm-dst */
   3429 //..       *p++ = 0x0F;
   3430 //..       *p++ = 0x10;
   3431 //..       *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst));
   3432 //..       *p++ = 0x24;
   3433 //..       /* addl $16, %esp */
   3434 //..       *p++ = 0x83;
   3435 //..       *p++ = 0xC4;
   3436 //..       *p++ = 0x10;
   3437 //..       goto done;
   3438 //..    }
   3439 
   3440    case Ain_SseLdSt:
   3441       if (i->Ain.SseLdSt.sz == 8) {
   3442          *p++ = 0xF2;
   3443       } else
   3444       if (i->Ain.SseLdSt.sz == 4) {
   3445          *p++ = 0xF3;
   3446       } else
   3447       if (i->Ain.SseLdSt.sz != 16) {
   3448          vassert(0);
   3449       }
   3450       *p++ = clearWBit(
   3451              rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr));
   3452       *p++ = 0x0F;
   3453       *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
   3454       p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr);
   3455       goto done;
   3456 
   3457    case Ain_SseLdzLO:
   3458       vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
   3459       /* movs[sd] amode, %xmm-dst */
   3460       *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
   3461       *p++ = clearWBit(
   3462              rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg),
   3463                         i->Ain.SseLdzLO.addr));
   3464       *p++ = 0x0F;
   3465       *p++ = 0x10;
   3466       p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg),
   3467                        i->Ain.SseLdzLO.addr);
   3468       goto done;
   3469 
   3470    case Ain_Sse32Fx4:
   3471       xtra = 0;
   3472       *p++ = clearWBit(
   3473              rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst),
   3474                          vreg2ireg(i->Ain.Sse32Fx4.src) ));
   3475       *p++ = 0x0F;
   3476       switch (i->Ain.Sse32Fx4.op) {
   3477          case Asse_ADDF:   *p++ = 0x58; break;
   3478          case Asse_DIVF:   *p++ = 0x5E; break;
   3479          case Asse_MAXF:   *p++ = 0x5F; break;
   3480          case Asse_MINF:   *p++ = 0x5D; break;
   3481          case Asse_MULF:   *p++ = 0x59; break;
   3482          case Asse_RCPF:   *p++ = 0x53; break;
   3483          case Asse_RSQRTF: *p++ = 0x52; break;
   3484          case Asse_SQRTF:  *p++ = 0x51; break;
   3485          case Asse_SUBF:   *p++ = 0x5C; break;
   3486          case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3487          case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3488          case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3489          case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3490          default: goto bad;
   3491       }
   3492       p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst),
   3493                        vreg2ireg(i->Ain.Sse32Fx4.src) );
   3494       if (xtra & 0x100)
   3495          *p++ = toUChar(xtra & 0xFF);
   3496       goto done;
   3497 
   3498    case Ain_Sse64Fx2:
   3499       xtra = 0;
   3500       *p++ = 0x66;
   3501       *p++ = clearWBit(
   3502              rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst),
   3503                          vreg2ireg(i->Ain.Sse64Fx2.src) ));
   3504       *p++ = 0x0F;
   3505       switch (i->Ain.Sse64Fx2.op) {
   3506          case Asse_ADDF:   *p++ = 0x58; break;
   3507          case Asse_DIVF:   *p++ = 0x5E; break;
   3508          case Asse_MAXF:   *p++ = 0x5F; break;
   3509          case Asse_MINF:   *p++ = 0x5D; break;
   3510          case Asse_MULF:   *p++ = 0x59; break;
   3511 //..          case Xsse_RCPF:   *p++ = 0x53; break;
   3512 //..          case Xsse_RSQRTF: *p++ = 0x52; break;
   3513          case Asse_SQRTF:  *p++ = 0x51; break;
   3514          case Asse_SUBF:   *p++ = 0x5C; break;
   3515          case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3516          case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3517          case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3518          case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3519          default: goto bad;
   3520       }
   3521       p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
   3522                        vreg2ireg(i->Ain.Sse64Fx2.src) );
   3523       if (xtra & 0x100)
   3524          *p++ = toUChar(xtra & 0xFF);
   3525       goto done;
   3526 
   3527    case Ain_Sse32FLo:
   3528       xtra = 0;
   3529       *p++ = 0xF3;
   3530       *p++ = clearWBit(
   3531              rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst),
   3532                          vreg2ireg(i->Ain.Sse32FLo.src) ));
   3533       *p++ = 0x0F;
   3534       switch (i->Ain.Sse32FLo.op) {
   3535          case Asse_ADDF:   *p++ = 0x58; break;
   3536          case Asse_DIVF:   *p++ = 0x5E; break;
   3537          case Asse_MAXF:   *p++ = 0x5F; break;
   3538          case Asse_MINF:   *p++ = 0x5D; break;
   3539          case Asse_MULF:   *p++ = 0x59; break;
   3540          case Asse_RCPF:   *p++ = 0x53; break;
   3541          case Asse_RSQRTF: *p++ = 0x52; break;
   3542          case Asse_SQRTF:  *p++ = 0x51; break;
   3543          case Asse_SUBF:   *p++ = 0x5C; break;
   3544          case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3545          case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3546          case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3547          case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3548          default: goto bad;
   3549       }
   3550       p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst),
   3551                        vreg2ireg(i->Ain.Sse32FLo.src) );
   3552       if (xtra & 0x100)
   3553          *p++ = toUChar(xtra & 0xFF);
   3554       goto done;
   3555 
   3556    case Ain_Sse64FLo:
   3557       xtra = 0;
   3558       *p++ = 0xF2;
   3559       *p++ = clearWBit(
   3560              rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst),
   3561                          vreg2ireg(i->Ain.Sse64FLo.src) ));
   3562       *p++ = 0x0F;
   3563       switch (i->Ain.Sse64FLo.op) {
   3564          case Asse_ADDF:   *p++ = 0x58; break;
   3565          case Asse_DIVF:   *p++ = 0x5E; break;
   3566          case Asse_MAXF:   *p++ = 0x5F; break;
   3567          case Asse_MINF:   *p++ = 0x5D; break;
   3568          case Asse_MULF:   *p++ = 0x59; break;
   3569 //..          case Xsse_RCPF:   *p++ = 0x53; break;
   3570 //..          case Xsse_RSQRTF: *p++ = 0x52; break;
   3571          case Asse_SQRTF:  *p++ = 0x51; break;
   3572          case Asse_SUBF:   *p++ = 0x5C; break;
   3573          case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3574          case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3575          case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3576          case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3577          default: goto bad;
   3578       }
   3579       p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst),
   3580                        vreg2ireg(i->Ain.Sse64FLo.src) );
   3581       if (xtra & 0x100)
   3582          *p++ = toUChar(xtra & 0xFF);
   3583       goto done;
   3584 
   3585    case Ain_SseReRg:
   3586 #     define XX(_n) *p++ = (_n)
   3587 
   3588       rex = clearWBit(
   3589             rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst),
   3590                         vreg2ireg(i->Ain.SseReRg.src) ));
   3591 
   3592       switch (i->Ain.SseReRg.op) {
   3593          case Asse_MOV:     /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
   3594          case Asse_OR:                 XX(rex); XX(0x0F); XX(0x56); break;
   3595          case Asse_XOR:                XX(rex); XX(0x0F); XX(0x57); break;
   3596          case Asse_AND:                XX(rex); XX(0x0F); XX(0x54); break;
   3597          case Asse_ANDN:               XX(rex); XX(0x0F); XX(0x55); break;
   3598          case Asse_PACKSSD:  XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
   3599          case Asse_PACKSSW:  XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
   3600          case Asse_PACKUSW:  XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
   3601          case Asse_ADD8:     XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
   3602          case Asse_ADD16:    XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
   3603          case Asse_ADD32:    XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
   3604          case Asse_ADD64:    XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
   3605          case Asse_QADD8S:   XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
   3606          case Asse_QADD16S:  XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
   3607          case Asse_QADD8U:   XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
   3608          case Asse_QADD16U:  XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
   3609          case Asse_AVG8U:    XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
   3610          case Asse_AVG16U:   XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
   3611          case Asse_CMPEQ8:   XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
   3612          case Asse_CMPEQ16:  XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
   3613          case Asse_CMPEQ32:  XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
   3614          case Asse_CMPGT8S:  XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
   3615          case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
   3616          case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
   3617          case Asse_MAX16S:   XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
   3618          case Asse_MAX8U:    XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
   3619          case Asse_MIN16S:   XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
   3620          case Asse_MIN8U:    XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
   3621          case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
   3622          case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
   3623          case Asse_MUL16:    XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
   3624          case Asse_SHL16:    XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
   3625          case Asse_SHL32:    XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
   3626          case Asse_SHL64:    XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
   3627          case Asse_SAR16:    XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
   3628          case Asse_SAR32:    XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
   3629          case Asse_SHR16:    XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
   3630          case Asse_SHR32:    XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
   3631          case Asse_SHR64:    XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
   3632          case Asse_SUB8:     XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
   3633          case Asse_SUB16:    XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
   3634          case Asse_SUB32:    XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
   3635          case Asse_SUB64:    XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
   3636          case Asse_QSUB8S:   XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
   3637          case Asse_QSUB16S:  XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
   3638          case Asse_QSUB8U:   XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
   3639          case Asse_QSUB16U:  XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
   3640          case Asse_UNPCKHB:  XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
   3641          case Asse_UNPCKHW:  XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
   3642          case Asse_UNPCKHD:  XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
   3643          case Asse_UNPCKHQ:  XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
   3644          case Asse_UNPCKLB:  XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
   3645          case Asse_UNPCKLW:  XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
   3646          case Asse_UNPCKLD:  XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
   3647          case Asse_UNPCKLQ:  XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
   3648          default: goto bad;
   3649       }
   3650       p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
   3651                        vreg2ireg(i->Ain.SseReRg.src) );
   3652 #     undef XX
   3653       goto done;
   3654 
   3655    case Ain_SseCMov:
   3656       /* jmp fwds if !condition */
   3657       *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
   3658       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   3659       ptmp = p;
   3660 
   3661       /* movaps %src, %dst */
   3662       *p++ = clearWBit(
   3663              rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst),
   3664                          vreg2ireg(i->Ain.SseCMov.src) ));
   3665       *p++ = 0x0F;
   3666       *p++ = 0x28;
   3667       p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst),
   3668                        vreg2ireg(i->Ain.SseCMov.src) );
   3669 
   3670       /* Fill in the jump offset. */
   3671       *(ptmp-1) = toUChar(p - ptmp);
   3672       goto done;
   3673 
   3674    case Ain_SseShuf:
   3675       *p++ = 0x66;
   3676       *p++ = clearWBit(
   3677              rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst),
   3678                          vreg2ireg(i->Ain.SseShuf.src) ));
   3679       *p++ = 0x0F;
   3680       *p++ = 0x70;
   3681       p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst),
   3682                        vreg2ireg(i->Ain.SseShuf.src) );
   3683       *p++ = (UChar)(i->Ain.SseShuf.order);
   3684       goto done;
   3685 
   3686    default:
   3687       goto bad;
   3688    }
   3689 
   3690   bad:
   3691    ppAMD64Instr(i, mode64);
   3692    vpanic("emit_AMD64Instr");
   3693    /*NOTREACHED*/
   3694 
   3695   done:
   3696    vassert(p - &buf[0] <= 32);
   3697    return p - &buf[0];
   3698 
   3699 #  undef fake
   3700 }
   3701 
   3702 /*---------------------------------------------------------------*/
   3703 /*--- end                                   host_amd64_defs.c ---*/
   3704 /*---------------------------------------------------------------*/
   3705