Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_x86_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2010 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_x86_defs.h"
     43 
     44 
     45 /* --------- Registers. --------- */
     46 
     47 void ppHRegX86 ( HReg reg )
     48 {
     49    Int r;
     50    static HChar* ireg32_names[8]
     51      = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
     52    /* Be generic for all virtual regs. */
     53    if (hregIsVirtual(reg)) {
     54       ppHReg(reg);
     55       return;
     56    }
     57    /* But specific for real regs. */
     58    switch (hregClass(reg)) {
     59       case HRcInt32:
     60          r = hregNumber(reg);
     61          vassert(r >= 0 && r < 8);
     62          vex_printf("%s", ireg32_names[r]);
     63          return;
     64       case HRcFlt64:
     65          r = hregNumber(reg);
     66          vassert(r >= 0 && r < 6);
     67          vex_printf("%%fake%d", r);
     68          return;
     69       case HRcVec128:
     70          r = hregNumber(reg);
     71          vassert(r >= 0 && r < 8);
     72          vex_printf("%%xmm%d", r);
     73          return;
     74       default:
     75          vpanic("ppHRegX86");
     76    }
     77 }
     78 
     79 HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); }
     80 HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); }
     81 HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); }
     82 HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); }
     83 HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); }
     84 HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); }
     85 HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); }
     86 HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); }
     87 
     88 HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
     89 HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
     90 HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
     91 HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
     92 HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
     93 HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
     94 
     95 HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); }
     96 HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); }
     97 HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); }
     98 HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); }
     99 HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); }
    100 HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); }
    101 HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); }
    102 HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); }
    103 
    104 
    105 void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
    106 {
    107    *nregs = 20;
    108    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    109    (*arr)[0] = hregX86_EAX();
    110    (*arr)[1] = hregX86_EBX();
    111    (*arr)[2] = hregX86_ECX();
    112    (*arr)[3] = hregX86_EDX();
    113    (*arr)[4] = hregX86_ESI();
    114    (*arr)[5] = hregX86_EDI();
    115    (*arr)[6] = hregX86_FAKE0();
    116    (*arr)[7] = hregX86_FAKE1();
    117    (*arr)[8] = hregX86_FAKE2();
    118    (*arr)[9] = hregX86_FAKE3();
    119    (*arr)[10] = hregX86_FAKE4();
    120    (*arr)[11] = hregX86_FAKE5();
    121    (*arr)[12] = hregX86_XMM0();
    122    (*arr)[13] = hregX86_XMM1();
    123    (*arr)[14] = hregX86_XMM2();
    124    (*arr)[15] = hregX86_XMM3();
    125    (*arr)[16] = hregX86_XMM4();
    126    (*arr)[17] = hregX86_XMM5();
    127    (*arr)[18] = hregX86_XMM6();
    128    (*arr)[19] = hregX86_XMM7();
    129 }
    130 
    131 
    132 /* --------- Condition codes, Intel encoding. --------- */
    133 
    134 HChar* showX86CondCode ( X86CondCode cond )
    135 {
    136    switch (cond) {
    137       case Xcc_O:      return "o";
    138       case Xcc_NO:     return "no";
    139       case Xcc_B:      return "b";
    140       case Xcc_NB:     return "nb";
    141       case Xcc_Z:      return "z";
    142       case Xcc_NZ:     return "nz";
    143       case Xcc_BE:     return "be";
    144       case Xcc_NBE:    return "nbe";
    145       case Xcc_S:      return "s";
    146       case Xcc_NS:     return "ns";
    147       case Xcc_P:      return "p";
    148       case Xcc_NP:     return "np";
    149       case Xcc_L:      return "l";
    150       case Xcc_NL:     return "nl";
    151       case Xcc_LE:     return "le";
    152       case Xcc_NLE:    return "nle";
    153       case Xcc_ALWAYS: return "ALWAYS";
    154       default: vpanic("ppX86CondCode");
    155    }
    156 }
    157 
    158 
    159 /* --------- X86AMode: memory address expressions. --------- */
    160 
    161 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
    162    X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
    163    am->tag = Xam_IR;
    164    am->Xam.IR.imm = imm32;
    165    am->Xam.IR.reg = reg;
    166    return am;
    167 }
    168 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
    169    X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
    170    am->tag = Xam_IRRS;
    171    am->Xam.IRRS.imm = imm32;
    172    am->Xam.IRRS.base = base;
    173    am->Xam.IRRS.index = indEx;
    174    am->Xam.IRRS.shift = shift;
    175    vassert(shift >= 0 && shift <= 3);
    176    return am;
    177 }
    178 
    179 X86AMode* dopyX86AMode ( X86AMode* am ) {
    180    switch (am->tag) {
    181       case Xam_IR:
    182          return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
    183       case Xam_IRRS:
    184          return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
    185                                am->Xam.IRRS.index, am->Xam.IRRS.shift );
    186       default:
    187          vpanic("dopyX86AMode");
    188    }
    189 }
    190 
    191 void ppX86AMode ( X86AMode* am ) {
    192    switch (am->tag) {
    193       case Xam_IR:
    194          if (am->Xam.IR.imm == 0)
    195             vex_printf("(");
    196          else
    197             vex_printf("0x%x(", am->Xam.IR.imm);
    198          ppHRegX86(am->Xam.IR.reg);
    199          vex_printf(")");
    200          return;
    201       case Xam_IRRS:
    202          vex_printf("0x%x(", am->Xam.IRRS.imm);
    203          ppHRegX86(am->Xam.IRRS.base);
    204          vex_printf(",");
    205          ppHRegX86(am->Xam.IRRS.index);
    206          vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
    207          return;
    208       default:
    209          vpanic("ppX86AMode");
    210    }
    211 }
    212 
    213 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
    214    switch (am->tag) {
    215       case Xam_IR:
    216          addHRegUse(u, HRmRead, am->Xam.IR.reg);
    217          return;
    218       case Xam_IRRS:
    219          addHRegUse(u, HRmRead, am->Xam.IRRS.base);
    220          addHRegUse(u, HRmRead, am->Xam.IRRS.index);
    221          return;
    222       default:
    223          vpanic("addRegUsage_X86AMode");
    224    }
    225 }
    226 
    227 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
    228    switch (am->tag) {
    229       case Xam_IR:
    230          am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
    231          return;
    232       case Xam_IRRS:
    233          am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
    234          am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
    235          return;
    236       default:
    237          vpanic("mapRegs_X86AMode");
    238    }
    239 }
    240 
    241 /* --------- Operand, which can be reg, immediate or memory. --------- */
    242 
    243 X86RMI* X86RMI_Imm ( UInt imm32 ) {
    244    X86RMI* op         = LibVEX_Alloc(sizeof(X86RMI));
    245    op->tag            = Xrmi_Imm;
    246    op->Xrmi.Imm.imm32 = imm32;
    247    return op;
    248 }
    249 X86RMI* X86RMI_Reg ( HReg reg ) {
    250    X86RMI* op       = LibVEX_Alloc(sizeof(X86RMI));
    251    op->tag          = Xrmi_Reg;
    252    op->Xrmi.Reg.reg = reg;
    253    return op;
    254 }
    255 X86RMI* X86RMI_Mem ( X86AMode* am ) {
    256    X86RMI* op      = LibVEX_Alloc(sizeof(X86RMI));
    257    op->tag         = Xrmi_Mem;
    258    op->Xrmi.Mem.am = am;
    259    return op;
    260 }
    261 
    262 void ppX86RMI ( X86RMI* op ) {
    263    switch (op->tag) {
    264       case Xrmi_Imm:
    265          vex_printf("$0x%x", op->Xrmi.Imm.imm32);
    266          return;
    267       case Xrmi_Reg:
    268          ppHRegX86(op->Xrmi.Reg.reg);
    269          return;
    270       case Xrmi_Mem:
    271          ppX86AMode(op->Xrmi.Mem.am);
    272          return;
    273      default:
    274          vpanic("ppX86RMI");
    275    }
    276 }
    277 
    278 /* An X86RMI can only be used in a "read" context (what would it mean
    279    to write or modify a literal?) and so we enumerate its registers
    280    accordingly. */
    281 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
    282    switch (op->tag) {
    283       case Xrmi_Imm:
    284          return;
    285       case Xrmi_Reg:
    286          addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
    287          return;
    288       case Xrmi_Mem:
    289          addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
    290          return;
    291       default:
    292          vpanic("addRegUsage_X86RMI");
    293    }
    294 }
    295 
    296 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
    297    switch (op->tag) {
    298       case Xrmi_Imm:
    299          return;
    300       case Xrmi_Reg:
    301          op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
    302          return;
    303       case Xrmi_Mem:
    304          mapRegs_X86AMode(m, op->Xrmi.Mem.am);
    305          return;
    306       default:
    307          vpanic("mapRegs_X86RMI");
    308    }
    309 }
    310 
    311 
    312 /* --------- Operand, which can be reg or immediate only. --------- */
    313 
    314 X86RI* X86RI_Imm ( UInt imm32 ) {
    315    X86RI* op         = LibVEX_Alloc(sizeof(X86RI));
    316    op->tag           = Xri_Imm;
    317    op->Xri.Imm.imm32 = imm32;
    318    return op;
    319 }
    320 X86RI* X86RI_Reg ( HReg reg ) {
    321    X86RI* op       = LibVEX_Alloc(sizeof(X86RI));
    322    op->tag         = Xri_Reg;
    323    op->Xri.Reg.reg = reg;
    324    return op;
    325 }
    326 
    327 void ppX86RI ( X86RI* op ) {
    328    switch (op->tag) {
    329       case Xri_Imm:
    330          vex_printf("$0x%x", op->Xri.Imm.imm32);
    331          return;
    332       case Xri_Reg:
    333          ppHRegX86(op->Xri.Reg.reg);
    334          return;
    335      default:
    336          vpanic("ppX86RI");
    337    }
    338 }
    339 
    340 /* An X86RI can only be used in a "read" context (what would it mean
    341    to write or modify a literal?) and so we enumerate its registers
    342    accordingly. */
    343 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
    344    switch (op->tag) {
    345       case Xri_Imm:
    346          return;
    347       case Xri_Reg:
    348          addHRegUse(u, HRmRead, op->Xri.Reg.reg);
    349          return;
    350       default:
    351          vpanic("addRegUsage_X86RI");
    352    }
    353 }
    354 
    355 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
    356    switch (op->tag) {
    357       case Xri_Imm:
    358          return;
    359       case Xri_Reg:
    360          op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
    361          return;
    362       default:
    363          vpanic("mapRegs_X86RI");
    364    }
    365 }
    366 
    367 
    368 /* --------- Operand, which can be reg or memory only. --------- */
    369 
    370 X86RM* X86RM_Reg ( HReg reg ) {
    371    X86RM* op       = LibVEX_Alloc(sizeof(X86RM));
    372    op->tag         = Xrm_Reg;
    373    op->Xrm.Reg.reg = reg;
    374    return op;
    375 }
    376 X86RM* X86RM_Mem ( X86AMode* am ) {
    377    X86RM* op      = LibVEX_Alloc(sizeof(X86RM));
    378    op->tag        = Xrm_Mem;
    379    op->Xrm.Mem.am = am;
    380    return op;
    381 }
    382 
    383 void ppX86RM ( X86RM* op ) {
    384    switch (op->tag) {
    385       case Xrm_Mem:
    386          ppX86AMode(op->Xrm.Mem.am);
    387          return;
    388       case Xrm_Reg:
    389          ppHRegX86(op->Xrm.Reg.reg);
    390          return;
    391      default:
    392          vpanic("ppX86RM");
    393    }
    394 }
    395 
    396 /* Because an X86RM can be both a source or destination operand, we
    397    have to supply a mode -- pertaining to the operand as a whole --
    398    indicating how it's being used. */
    399 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
    400    switch (op->tag) {
    401       case Xrm_Mem:
    402          /* Memory is read, written or modified.  So we just want to
    403             know the regs read by the amode. */
    404          addRegUsage_X86AMode(u, op->Xrm.Mem.am);
    405          return;
    406       case Xrm_Reg:
    407          /* reg is read, written or modified.  Add it in the
    408             appropriate way. */
    409          addHRegUse(u, mode, op->Xrm.Reg.reg);
    410          return;
    411      default:
    412          vpanic("addRegUsage_X86RM");
    413    }
    414 }
    415 
    416 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
    417 {
    418    switch (op->tag) {
    419       case Xrm_Mem:
    420          mapRegs_X86AMode(m, op->Xrm.Mem.am);
    421          return;
    422       case Xrm_Reg:
    423          op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
    424          return;
    425      default:
    426          vpanic("mapRegs_X86RM");
    427    }
    428 }
    429 
    430 
    431 /* --------- Instructions. --------- */
    432 
    433 HChar* showX86UnaryOp ( X86UnaryOp op ) {
    434    switch (op) {
    435       case Xun_NOT: return "not";
    436       case Xun_NEG: return "neg";
    437       default: vpanic("showX86UnaryOp");
    438    }
    439 }
    440 
    441 HChar* showX86AluOp ( X86AluOp op ) {
    442    switch (op) {
    443       case Xalu_MOV:  return "mov";
    444       case Xalu_CMP:  return "cmp";
    445       case Xalu_ADD:  return "add";
    446       case Xalu_SUB:  return "sub";
    447       case Xalu_ADC:  return "adc";
    448       case Xalu_SBB:  return "sbb";
    449       case Xalu_AND:  return "and";
    450       case Xalu_OR:   return "or";
    451       case Xalu_XOR:  return "xor";
    452       case Xalu_MUL:  return "mul";
    453       default: vpanic("showX86AluOp");
    454    }
    455 }
    456 
    457 HChar* showX86ShiftOp ( X86ShiftOp op ) {
    458    switch (op) {
    459       case Xsh_SHL: return "shl";
    460       case Xsh_SHR: return "shr";
    461       case Xsh_SAR: return "sar";
    462       default: vpanic("showX86ShiftOp");
    463    }
    464 }
    465 
    466 HChar* showX86FpOp ( X86FpOp op ) {
    467    switch (op) {
    468       case Xfp_ADD:    return "add";
    469       case Xfp_SUB:    return "sub";
    470       case Xfp_MUL:    return "mul";
    471       case Xfp_DIV:    return "div";
    472       case Xfp_SCALE:  return "scale";
    473       case Xfp_ATAN:   return "atan";
    474       case Xfp_YL2X:   return "yl2x";
    475       case Xfp_YL2XP1: return "yl2xp1";
    476       case Xfp_PREM:   return "prem";
    477       case Xfp_PREM1:  return "prem1";
    478       case Xfp_SQRT:   return "sqrt";
    479       case Xfp_ABS:    return "abs";
    480       case Xfp_NEG:    return "chs";
    481       case Xfp_MOV:    return "mov";
    482       case Xfp_SIN:    return "sin";
    483       case Xfp_COS:    return "cos";
    484       case Xfp_TAN:    return "tan";
    485       case Xfp_ROUND:  return "round";
    486       case Xfp_2XM1:   return "2xm1";
    487       default: vpanic("showX86FpOp");
    488    }
    489 }
    490 
    491 HChar* showX86SseOp ( X86SseOp op ) {
    492    switch (op) {
    493       case Xsse_MOV:      return "mov(?!)";
    494       case Xsse_ADDF:     return "add";
    495       case Xsse_SUBF:     return "sub";
    496       case Xsse_MULF:     return "mul";
    497       case Xsse_DIVF:     return "div";
    498       case Xsse_MAXF:     return "max";
    499       case Xsse_MINF:     return "min";
    500       case Xsse_CMPEQF:   return "cmpFeq";
    501       case Xsse_CMPLTF:   return "cmpFlt";
    502       case Xsse_CMPLEF:   return "cmpFle";
    503       case Xsse_CMPUNF:   return "cmpFun";
    504       case Xsse_RCPF:     return "rcp";
    505       case Xsse_RSQRTF:   return "rsqrt";
    506       case Xsse_SQRTF:    return "sqrt";
    507       case Xsse_AND:      return "and";
    508       case Xsse_OR:       return "or";
    509       case Xsse_XOR:      return "xor";
    510       case Xsse_ANDN:     return "andn";
    511       case Xsse_ADD8:     return "paddb";
    512       case Xsse_ADD16:    return "paddw";
    513       case Xsse_ADD32:    return "paddd";
    514       case Xsse_ADD64:    return "paddq";
    515       case Xsse_QADD8U:   return "paddusb";
    516       case Xsse_QADD16U:  return "paddusw";
    517       case Xsse_QADD8S:   return "paddsb";
    518       case Xsse_QADD16S:  return "paddsw";
    519       case Xsse_SUB8:     return "psubb";
    520       case Xsse_SUB16:    return "psubw";
    521       case Xsse_SUB32:    return "psubd";
    522       case Xsse_SUB64:    return "psubq";
    523       case Xsse_QSUB8U:   return "psubusb";
    524       case Xsse_QSUB16U:  return "psubusw";
    525       case Xsse_QSUB8S:   return "psubsb";
    526       case Xsse_QSUB16S:  return "psubsw";
    527       case Xsse_MUL16:    return "pmullw";
    528       case Xsse_MULHI16U: return "pmulhuw";
    529       case Xsse_MULHI16S: return "pmulhw";
    530       case Xsse_AVG8U:    return "pavgb";
    531       case Xsse_AVG16U:   return "pavgw";
    532       case Xsse_MAX16S:   return "pmaxw";
    533       case Xsse_MAX8U:    return "pmaxub";
    534       case Xsse_MIN16S:   return "pminw";
    535       case Xsse_MIN8U:    return "pminub";
    536       case Xsse_CMPEQ8:   return "pcmpeqb";
    537       case Xsse_CMPEQ16:  return "pcmpeqw";
    538       case Xsse_CMPEQ32:  return "pcmpeqd";
    539       case Xsse_CMPGT8S:  return "pcmpgtb";
    540       case Xsse_CMPGT16S: return "pcmpgtw";
    541       case Xsse_CMPGT32S: return "pcmpgtd";
    542       case Xsse_SHL16:    return "psllw";
    543       case Xsse_SHL32:    return "pslld";
    544       case Xsse_SHL64:    return "psllq";
    545       case Xsse_SHR16:    return "psrlw";
    546       case Xsse_SHR32:    return "psrld";
    547       case Xsse_SHR64:    return "psrlq";
    548       case Xsse_SAR16:    return "psraw";
    549       case Xsse_SAR32:    return "psrad";
    550       case Xsse_PACKSSD:  return "packssdw";
    551       case Xsse_PACKSSW:  return "packsswb";
    552       case Xsse_PACKUSW:  return "packuswb";
    553       case Xsse_UNPCKHB:  return "punpckhb";
    554       case Xsse_UNPCKHW:  return "punpckhw";
    555       case Xsse_UNPCKHD:  return "punpckhd";
    556       case Xsse_UNPCKHQ:  return "punpckhq";
    557       case Xsse_UNPCKLB:  return "punpcklb";
    558       case Xsse_UNPCKLW:  return "punpcklw";
    559       case Xsse_UNPCKLD:  return "punpckld";
    560       case Xsse_UNPCKLQ:  return "punpcklq";
    561       default: vpanic("showX86SseOp");
    562    }
    563 }
    564 
    565 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
    566    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    567    i->tag            = Xin_Alu32R;
    568    i->Xin.Alu32R.op  = op;
    569    i->Xin.Alu32R.src = src;
    570    i->Xin.Alu32R.dst = dst;
    571    return i;
    572 }
    573 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
    574    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    575    i->tag            = Xin_Alu32M;
    576    i->Xin.Alu32M.op  = op;
    577    i->Xin.Alu32M.src = src;
    578    i->Xin.Alu32M.dst = dst;
    579    vassert(op != Xalu_MUL);
    580    return i;
    581 }
    582 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
    583    X86Instr* i     = LibVEX_Alloc(sizeof(X86Instr));
    584    i->tag          = Xin_Sh32;
    585    i->Xin.Sh32.op  = op;
    586    i->Xin.Sh32.src = src;
    587    i->Xin.Sh32.dst = dst;
    588    return i;
    589 }
    590 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
    591    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    592    i->tag              = Xin_Test32;
    593    i->Xin.Test32.imm32 = imm32;
    594    i->Xin.Test32.dst   = dst;
    595    return i;
    596 }
    597 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
    598    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    599    i->tag             = Xin_Unary32;
    600    i->Xin.Unary32.op  = op;
    601    i->Xin.Unary32.dst = dst;
    602    return i;
    603 }
    604 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
    605    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    606    i->tag             = Xin_Lea32;
    607    i->Xin.Lea32.am    = am;
    608    i->Xin.Lea32.dst   = dst;
    609    return i;
    610 }
    611 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
    612    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    613    i->tag             = Xin_MulL;
    614    i->Xin.MulL.syned  = syned;
    615    i->Xin.MulL.src    = src;
    616    return i;
    617 }
    618 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
    619    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    620    i->tag           = Xin_Div;
    621    i->Xin.Div.syned = syned;
    622    i->Xin.Div.src   = src;
    623    return i;
    624 }
    625 X86Instr* X86Instr_Sh3232  ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
    626    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    627    i->tag            = Xin_Sh3232;
    628    i->Xin.Sh3232.op  = op;
    629    i->Xin.Sh3232.amt = amt;
    630    i->Xin.Sh3232.src = src;
    631    i->Xin.Sh3232.dst = dst;
    632    vassert(op == Xsh_SHL || op == Xsh_SHR);
    633    return i;
    634 }
    635 X86Instr* X86Instr_Push( X86RMI* src ) {
    636    X86Instr* i     = LibVEX_Alloc(sizeof(X86Instr));
    637    i->tag          = Xin_Push;
    638    i->Xin.Push.src = src;
    639    return i;
    640 }
    641 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms ) {
    642    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    643    i->tag               = Xin_Call;
    644    i->Xin.Call.cond     = cond;
    645    i->Xin.Call.target   = target;
    646    i->Xin.Call.regparms = regparms;
    647    vassert(regparms >= 0 && regparms <= 3);
    648    return i;
    649 }
    650 X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) {
    651    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    652    i->tag           = Xin_Goto;
    653    i->Xin.Goto.cond = cond;
    654    i->Xin.Goto.dst  = dst;
    655    i->Xin.Goto.jk   = jk;
    656    return i;
    657 }
    658 X86Instr* X86Instr_CMov32  ( X86CondCode cond, X86RM* src, HReg dst ) {
    659    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    660    i->tag             = Xin_CMov32;
    661    i->Xin.CMov32.cond = cond;
    662    i->Xin.CMov32.src  = src;
    663    i->Xin.CMov32.dst  = dst;
    664    vassert(cond != Xcc_ALWAYS);
    665    return i;
    666 }
    667 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
    668                             X86AMode* src, HReg dst ) {
    669    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    670    i->tag                = Xin_LoadEX;
    671    i->Xin.LoadEX.szSmall = szSmall;
    672    i->Xin.LoadEX.syned   = syned;
    673    i->Xin.LoadEX.src     = src;
    674    i->Xin.LoadEX.dst     = dst;
    675    vassert(szSmall == 1 || szSmall == 2);
    676    return i;
    677 }
    678 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
    679    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    680    i->tag           = Xin_Store;
    681    i->Xin.Store.sz  = sz;
    682    i->Xin.Store.src = src;
    683    i->Xin.Store.dst = dst;
    684    vassert(sz == 1 || sz == 2);
    685    return i;
    686 }
    687 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
    688    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    689    i->tag            = Xin_Set32;
    690    i->Xin.Set32.cond = cond;
    691    i->Xin.Set32.dst  = dst;
    692    return i;
    693 }
    694 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
    695    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    696    i->tag               = Xin_Bsfr32;
    697    i->Xin.Bsfr32.isFwds = isFwds;
    698    i->Xin.Bsfr32.src    = src;
    699    i->Xin.Bsfr32.dst    = dst;
    700    return i;
    701 }
    702 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
    703    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    704    i->tag               = Xin_MFence;
    705    i->Xin.MFence.hwcaps = hwcaps;
    706    vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1
    707                             |VEX_HWCAPS_X86_SSE2
    708                             |VEX_HWCAPS_X86_SSE3
    709                             |VEX_HWCAPS_X86_LZCNT)));
    710    return i;
    711 }
    712 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
    713    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    714    i->tag           = Xin_ACAS;
    715    i->Xin.ACAS.addr = addr;
    716    i->Xin.ACAS.sz   = sz;
    717    vassert(sz == 4 || sz == 2 || sz == 1);
    718    return i;
    719 }
    720 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
    721    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    722    i->tag            = Xin_DACAS;
    723    i->Xin.DACAS.addr = addr;
    724    return i;
    725 }
    726 
    727 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
    728    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    729    i->tag             = Xin_FpUnary;
    730    i->Xin.FpUnary.op  = op;
    731    i->Xin.FpUnary.src = src;
    732    i->Xin.FpUnary.dst = dst;
    733    return i;
    734 }
    735 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
    736    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    737    i->tag               = Xin_FpBinary;
    738    i->Xin.FpBinary.op   = op;
    739    i->Xin.FpBinary.srcL = srcL;
    740    i->Xin.FpBinary.srcR = srcR;
    741    i->Xin.FpBinary.dst  = dst;
    742    return i;
    743 }
    744 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
    745    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    746    i->tag               = Xin_FpLdSt;
    747    i->Xin.FpLdSt.isLoad = isLoad;
    748    i->Xin.FpLdSt.sz     = sz;
    749    i->Xin.FpLdSt.reg    = reg;
    750    i->Xin.FpLdSt.addr   = addr;
    751    vassert(sz == 4 || sz == 8 || sz == 10);
    752    return i;
    753 }
    754 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
    755                              HReg reg, X86AMode* addr ) {
    756    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    757    i->tag                = Xin_FpLdStI;
    758    i->Xin.FpLdStI.isLoad = isLoad;
    759    i->Xin.FpLdStI.sz     = sz;
    760    i->Xin.FpLdStI.reg    = reg;
    761    i->Xin.FpLdStI.addr   = addr;
    762    vassert(sz == 2 || sz == 4 || sz == 8);
    763    return i;
    764 }
    765 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
    766    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    767    i->tag              = Xin_Fp64to32;
    768    i->Xin.Fp64to32.src = src;
    769    i->Xin.Fp64to32.dst = dst;
    770    return i;
    771 }
    772 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
    773    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    774    i->tag             = Xin_FpCMov;
    775    i->Xin.FpCMov.cond = cond;
    776    i->Xin.FpCMov.src  = src;
    777    i->Xin.FpCMov.dst  = dst;
    778    vassert(cond != Xcc_ALWAYS);
    779    return i;
    780 }
    781 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
    782    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    783    i->tag               = Xin_FpLdCW;
    784    i->Xin.FpLdCW.addr   = addr;
    785    return i;
    786 }
    787 X86Instr* X86Instr_FpStSW_AX ( void ) {
    788    X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
    789    i->tag      = Xin_FpStSW_AX;
    790    return i;
    791 }
    792 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
    793    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    794    i->tag            = Xin_FpCmp;
    795    i->Xin.FpCmp.srcL = srcL;
    796    i->Xin.FpCmp.srcR = srcR;
    797    i->Xin.FpCmp.dst  = dst;
    798    return i;
    799 }
    800 
    801 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
    802    X86Instr* i            = LibVEX_Alloc(sizeof(X86Instr));
    803    i->tag                 = Xin_SseConst;
    804    i->Xin.SseConst.con    = con;
    805    i->Xin.SseConst.dst    = dst;
    806    vassert(hregClass(dst) == HRcVec128);
    807    return i;
    808 }
    809 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
    810    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    811    i->tag                = Xin_SseLdSt;
    812    i->Xin.SseLdSt.isLoad = isLoad;
    813    i->Xin.SseLdSt.reg    = reg;
    814    i->Xin.SseLdSt.addr   = addr;
    815    return i;
    816 }
    817 X86Instr* X86Instr_SseLdzLO  ( Int sz, HReg reg, X86AMode* addr )
    818 {
    819    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    820    i->tag                = Xin_SseLdzLO;
    821    i->Xin.SseLdzLO.sz    = toUChar(sz);
    822    i->Xin.SseLdzLO.reg   = reg;
    823    i->Xin.SseLdzLO.addr  = addr;
    824    vassert(sz == 4 || sz == 8);
    825    return i;
    826 }
    827 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
    828    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    829    i->tag              = Xin_Sse32Fx4;
    830    i->Xin.Sse32Fx4.op  = op;
    831    i->Xin.Sse32Fx4.src = src;
    832    i->Xin.Sse32Fx4.dst = dst;
    833    vassert(op != Xsse_MOV);
    834    return i;
    835 }
    836 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
    837    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    838    i->tag              = Xin_Sse32FLo;
    839    i->Xin.Sse32FLo.op  = op;
    840    i->Xin.Sse32FLo.src = src;
    841    i->Xin.Sse32FLo.dst = dst;
    842    vassert(op != Xsse_MOV);
    843    return i;
    844 }
    845 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
    846    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    847    i->tag              = Xin_Sse64Fx2;
    848    i->Xin.Sse64Fx2.op  = op;
    849    i->Xin.Sse64Fx2.src = src;
    850    i->Xin.Sse64Fx2.dst = dst;
    851    vassert(op != Xsse_MOV);
    852    return i;
    853 }
    854 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
    855    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    856    i->tag              = Xin_Sse64FLo;
    857    i->Xin.Sse64FLo.op  = op;
    858    i->Xin.Sse64FLo.src = src;
    859    i->Xin.Sse64FLo.dst = dst;
    860    vassert(op != Xsse_MOV);
    861    return i;
    862 }
    863 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
    864    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    865    i->tag             = Xin_SseReRg;
    866    i->Xin.SseReRg.op  = op;
    867    i->Xin.SseReRg.src = re;
    868    i->Xin.SseReRg.dst = rg;
    869    return i;
    870 }
    871 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
    872    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    873    i->tag              = Xin_SseCMov;
    874    i->Xin.SseCMov.cond = cond;
    875    i->Xin.SseCMov.src  = src;
    876    i->Xin.SseCMov.dst  = dst;
    877    vassert(cond != Xcc_ALWAYS);
    878    return i;
    879 }
    880 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
    881    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    882    i->tag               = Xin_SseShuf;
    883    i->Xin.SseShuf.order = order;
    884    i->Xin.SseShuf.src   = src;
    885    i->Xin.SseShuf.dst   = dst;
    886    vassert(order >= 0 && order <= 0xFF);
    887    return i;
    888 }
    889 
    890 void ppX86Instr ( X86Instr* i, Bool mode64 ) {
    891    vassert(mode64 == False);
    892    switch (i->tag) {
    893       case Xin_Alu32R:
    894          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
    895          ppX86RMI(i->Xin.Alu32R.src);
    896          vex_printf(",");
    897          ppHRegX86(i->Xin.Alu32R.dst);
    898          return;
    899       case Xin_Alu32M:
    900          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
    901          ppX86RI(i->Xin.Alu32M.src);
    902          vex_printf(",");
    903          ppX86AMode(i->Xin.Alu32M.dst);
    904          return;
    905       case Xin_Sh32:
    906          vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
    907          if (i->Xin.Sh32.src == 0)
    908            vex_printf("%%cl,");
    909          else
    910             vex_printf("$%d,", (Int)i->Xin.Sh32.src);
    911          ppHRegX86(i->Xin.Sh32.dst);
    912          return;
    913       case Xin_Test32:
    914          vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
    915          ppX86RM(i->Xin.Test32.dst);
    916          return;
    917       case Xin_Unary32:
    918          vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
    919          ppHRegX86(i->Xin.Unary32.dst);
    920          return;
    921       case Xin_Lea32:
    922          vex_printf("leal ");
    923          ppX86AMode(i->Xin.Lea32.am);
    924          vex_printf(",");
    925          ppHRegX86(i->Xin.Lea32.dst);
    926          return;
    927       case Xin_MulL:
    928          vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
    929          ppX86RM(i->Xin.MulL.src);
    930          return;
    931       case Xin_Div:
    932          vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
    933          ppX86RM(i->Xin.Div.src);
    934          return;
    935       case Xin_Sh3232:
    936          vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
    937          if (i->Xin.Sh3232.amt == 0)
    938            vex_printf(" %%cl,");
    939          else
    940             vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
    941          ppHRegX86(i->Xin.Sh3232.src);
    942          vex_printf(",");
    943          ppHRegX86(i->Xin.Sh3232.dst);
    944          return;
    945       case Xin_Push:
    946          vex_printf("pushl ");
    947          ppX86RMI(i->Xin.Push.src);
    948          return;
    949       case Xin_Call:
    950          vex_printf("call%s[%d] ",
    951                     i->Xin.Call.cond==Xcc_ALWAYS
    952                        ? "" : showX86CondCode(i->Xin.Call.cond),
    953                     i->Xin.Call.regparms);
    954          vex_printf("0x%x", i->Xin.Call.target);
    955          break;
    956       case Xin_Goto:
    957          if (i->Xin.Goto.cond != Xcc_ALWAYS) {
    958             vex_printf("if (%%eflags.%s) { ",
    959                        showX86CondCode(i->Xin.Goto.cond));
    960 	 }
    961          if (i->Xin.Goto.jk != Ijk_Boring
    962              && i->Xin.Goto.jk != Ijk_Call
    963              && i->Xin.Goto.jk != Ijk_Ret) {
    964             vex_printf("movl $");
    965             ppIRJumpKind(i->Xin.Goto.jk);
    966             vex_printf(",%%ebp ; ");
    967          }
    968          vex_printf("movl ");
    969          ppX86RI(i->Xin.Goto.dst);
    970          vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx");
    971          if (i->Xin.Goto.cond != Xcc_ALWAYS) {
    972             vex_printf(" }");
    973 	 }
    974          return;
    975       case Xin_CMov32:
    976          vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
    977          ppX86RM(i->Xin.CMov32.src);
    978          vex_printf(",");
    979          ppHRegX86(i->Xin.CMov32.dst);
    980          return;
    981       case Xin_LoadEX:
    982          vex_printf("mov%c%cl ",
    983                     i->Xin.LoadEX.syned ? 's' : 'z',
    984                     i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
    985          ppX86AMode(i->Xin.LoadEX.src);
    986          vex_printf(",");
    987          ppHRegX86(i->Xin.LoadEX.dst);
    988          return;
    989       case Xin_Store:
    990          vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
    991          ppHRegX86(i->Xin.Store.src);
    992          vex_printf(",");
    993          ppX86AMode(i->Xin.Store.dst);
    994          return;
    995       case Xin_Set32:
    996          vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
    997          ppHRegX86(i->Xin.Set32.dst);
    998          return;
    999       case Xin_Bsfr32:
   1000          vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
   1001          ppHRegX86(i->Xin.Bsfr32.src);
   1002          vex_printf(",");
   1003          ppHRegX86(i->Xin.Bsfr32.dst);
   1004          return;
   1005       case Xin_MFence:
   1006          vex_printf("mfence(%s)",
   1007                     LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
   1008          return;
   1009       case Xin_ACAS:
   1010          vex_printf("lock cmpxchg%c ",
   1011                      i->Xin.ACAS.sz==1 ? 'b'
   1012                                        : i->Xin.ACAS.sz==2 ? 'w' : 'l');
   1013          vex_printf("{%%eax->%%ebx},");
   1014          ppX86AMode(i->Xin.ACAS.addr);
   1015          return;
   1016       case Xin_DACAS:
   1017          vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
   1018          ppX86AMode(i->Xin.DACAS.addr);
   1019          return;
   1020       case Xin_FpUnary:
   1021          vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
   1022          ppHRegX86(i->Xin.FpUnary.src);
   1023          vex_printf(",");
   1024          ppHRegX86(i->Xin.FpUnary.dst);
   1025          break;
   1026       case Xin_FpBinary:
   1027          vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
   1028          ppHRegX86(i->Xin.FpBinary.srcL);
   1029          vex_printf(",");
   1030          ppHRegX86(i->Xin.FpBinary.srcR);
   1031          vex_printf(",");
   1032          ppHRegX86(i->Xin.FpBinary.dst);
   1033          break;
   1034       case Xin_FpLdSt:
   1035          if (i->Xin.FpLdSt.isLoad) {
   1036             vex_printf("gld%c " ,  i->Xin.FpLdSt.sz==10 ? 'T'
   1037                                    : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
   1038             ppX86AMode(i->Xin.FpLdSt.addr);
   1039             vex_printf(", ");
   1040             ppHRegX86(i->Xin.FpLdSt.reg);
   1041          } else {
   1042             vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
   1043                                   : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
   1044             ppHRegX86(i->Xin.FpLdSt.reg);
   1045             vex_printf(", ");
   1046             ppX86AMode(i->Xin.FpLdSt.addr);
   1047          }
   1048          return;
   1049       case Xin_FpLdStI:
   1050          if (i->Xin.FpLdStI.isLoad) {
   1051             vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1052                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1053             ppX86AMode(i->Xin.FpLdStI.addr);
   1054             vex_printf(", ");
   1055             ppHRegX86(i->Xin.FpLdStI.reg);
   1056          } else {
   1057             vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1058                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1059             ppHRegX86(i->Xin.FpLdStI.reg);
   1060             vex_printf(", ");
   1061             ppX86AMode(i->Xin.FpLdStI.addr);
   1062          }
   1063          return;
   1064       case Xin_Fp64to32:
   1065          vex_printf("gdtof ");
   1066          ppHRegX86(i->Xin.Fp64to32.src);
   1067          vex_printf(",");
   1068          ppHRegX86(i->Xin.Fp64to32.dst);
   1069          return;
   1070       case Xin_FpCMov:
   1071          vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
   1072          ppHRegX86(i->Xin.FpCMov.src);
   1073          vex_printf(",");
   1074          ppHRegX86(i->Xin.FpCMov.dst);
   1075          return;
   1076       case Xin_FpLdCW:
   1077          vex_printf("fldcw ");
   1078          ppX86AMode(i->Xin.FpLdCW.addr);
   1079          return;
   1080       case Xin_FpStSW_AX:
   1081          vex_printf("fstsw %%ax");
   1082          return;
   1083       case Xin_FpCmp:
   1084          vex_printf("gcmp ");
   1085          ppHRegX86(i->Xin.FpCmp.srcL);
   1086          vex_printf(",");
   1087          ppHRegX86(i->Xin.FpCmp.srcR);
   1088          vex_printf(",");
   1089          ppHRegX86(i->Xin.FpCmp.dst);
   1090          break;
   1091       case Xin_SseConst:
   1092          vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
   1093          ppHRegX86(i->Xin.SseConst.dst);
   1094          break;
   1095       case Xin_SseLdSt:
   1096          vex_printf("movups ");
   1097          if (i->Xin.SseLdSt.isLoad) {
   1098             ppX86AMode(i->Xin.SseLdSt.addr);
   1099             vex_printf(",");
   1100             ppHRegX86(i->Xin.SseLdSt.reg);
   1101          } else {
   1102             ppHRegX86(i->Xin.SseLdSt.reg);
   1103             vex_printf(",");
   1104             ppX86AMode(i->Xin.SseLdSt.addr);
   1105          }
   1106          return;
   1107       case Xin_SseLdzLO:
   1108          vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
   1109          ppX86AMode(i->Xin.SseLdzLO.addr);
   1110          vex_printf(",");
   1111          ppHRegX86(i->Xin.SseLdzLO.reg);
   1112          return;
   1113       case Xin_Sse32Fx4:
   1114          vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
   1115          ppHRegX86(i->Xin.Sse32Fx4.src);
   1116          vex_printf(",");
   1117          ppHRegX86(i->Xin.Sse32Fx4.dst);
   1118          return;
   1119       case Xin_Sse32FLo:
   1120          vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
   1121          ppHRegX86(i->Xin.Sse32FLo.src);
   1122          vex_printf(",");
   1123          ppHRegX86(i->Xin.Sse32FLo.dst);
   1124          return;
   1125       case Xin_Sse64Fx2:
   1126          vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
   1127          ppHRegX86(i->Xin.Sse64Fx2.src);
   1128          vex_printf(",");
   1129          ppHRegX86(i->Xin.Sse64Fx2.dst);
   1130          return;
   1131       case Xin_Sse64FLo:
   1132          vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
   1133          ppHRegX86(i->Xin.Sse64FLo.src);
   1134          vex_printf(",");
   1135          ppHRegX86(i->Xin.Sse64FLo.dst);
   1136          return;
   1137       case Xin_SseReRg:
   1138          vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
   1139          ppHRegX86(i->Xin.SseReRg.src);
   1140          vex_printf(",");
   1141          ppHRegX86(i->Xin.SseReRg.dst);
   1142          return;
   1143       case Xin_SseCMov:
   1144          vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
   1145          ppHRegX86(i->Xin.SseCMov.src);
   1146          vex_printf(",");
   1147          ppHRegX86(i->Xin.SseCMov.dst);
   1148          return;
   1149       case Xin_SseShuf:
   1150          vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order);
   1151          ppHRegX86(i->Xin.SseShuf.src);
   1152          vex_printf(",");
   1153          ppHRegX86(i->Xin.SseShuf.dst);
   1154          return;
   1155 
   1156       default:
   1157          vpanic("ppX86Instr");
   1158    }
   1159 }
   1160 
   1161 /* --------- Helpers for register allocation. --------- */
   1162 
   1163 void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64)
   1164 {
   1165    Bool unary;
   1166    vassert(mode64 == False);
   1167    initHRegUsage(u);
   1168    switch (i->tag) {
   1169       case Xin_Alu32R:
   1170          addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
   1171          if (i->Xin.Alu32R.op == Xalu_MOV) {
   1172             addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
   1173             return;
   1174          }
   1175          if (i->Xin.Alu32R.op == Xalu_CMP) {
   1176             addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
   1177             return;
   1178          }
   1179          addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
   1180          return;
   1181       case Xin_Alu32M:
   1182          addRegUsage_X86RI(u, i->Xin.Alu32M.src);
   1183          addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
   1184          return;
   1185       case Xin_Sh32:
   1186          addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
   1187          if (i->Xin.Sh32.src == 0)
   1188             addHRegUse(u, HRmRead, hregX86_ECX());
   1189          return;
   1190       case Xin_Test32:
   1191          addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
   1192          return;
   1193       case Xin_Unary32:
   1194          addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
   1195          return;
   1196       case Xin_Lea32:
   1197          addRegUsage_X86AMode(u, i->Xin.Lea32.am);
   1198          addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
   1199          return;
   1200       case Xin_MulL:
   1201          addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
   1202          addHRegUse(u, HRmModify, hregX86_EAX());
   1203          addHRegUse(u, HRmWrite, hregX86_EDX());
   1204          return;
   1205       case Xin_Div:
   1206          addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
   1207          addHRegUse(u, HRmModify, hregX86_EAX());
   1208          addHRegUse(u, HRmModify, hregX86_EDX());
   1209          return;
   1210       case Xin_Sh3232:
   1211          addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
   1212          addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
   1213          if (i->Xin.Sh3232.amt == 0)
   1214             addHRegUse(u, HRmRead, hregX86_ECX());
   1215          return;
   1216       case Xin_Push:
   1217          addRegUsage_X86RMI(u, i->Xin.Push.src);
   1218          addHRegUse(u, HRmModify, hregX86_ESP());
   1219          return;
   1220       case Xin_Call:
   1221          /* This is a bit subtle. */
   1222          /* First off, claim it trashes all the caller-saved regs
   1223             which fall within the register allocator's jurisdiction.
   1224             These I believe to be %eax %ecx %edx and all the xmm
   1225             registers. */
   1226          addHRegUse(u, HRmWrite, hregX86_EAX());
   1227          addHRegUse(u, HRmWrite, hregX86_ECX());
   1228          addHRegUse(u, HRmWrite, hregX86_EDX());
   1229          addHRegUse(u, HRmWrite, hregX86_XMM0());
   1230          addHRegUse(u, HRmWrite, hregX86_XMM1());
   1231          addHRegUse(u, HRmWrite, hregX86_XMM2());
   1232          addHRegUse(u, HRmWrite, hregX86_XMM3());
   1233          addHRegUse(u, HRmWrite, hregX86_XMM4());
   1234          addHRegUse(u, HRmWrite, hregX86_XMM5());
   1235          addHRegUse(u, HRmWrite, hregX86_XMM6());
   1236          addHRegUse(u, HRmWrite, hregX86_XMM7());
   1237          /* Now we have to state any parameter-carrying registers
   1238             which might be read.  This depends on the regparmness. */
   1239          switch (i->Xin.Call.regparms) {
   1240             case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
   1241             case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
   1242             case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
   1243             case 0: break;
   1244             default: vpanic("getRegUsage_X86Instr:Call:regparms");
   1245          }
   1246          /* Finally, there is the issue that the insn trashes a
   1247             register because the literal target address has to be
   1248             loaded into a register.  Fortunately, for the 0/1/2
   1249             regparm case, we can use EAX, EDX and ECX respectively, so
   1250             this does not cause any further damage.  For the 3-regparm
   1251             case, we'll have to choose another register arbitrarily --
   1252             since A, D and C are used for parameters -- and so we might
   1253             as well choose EDI. */
   1254          if (i->Xin.Call.regparms == 3)
   1255             addHRegUse(u, HRmWrite, hregX86_EDI());
   1256          /* Upshot of this is that the assembler really must observe
   1257             the here-stated convention of which register to use as an
   1258             address temporary, depending on the regparmness: 0==EAX,
   1259             1==EDX, 2==ECX, 3==EDI. */
   1260          return;
   1261       case Xin_Goto:
   1262          addRegUsage_X86RI(u, i->Xin.Goto.dst);
   1263          addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */
   1264          addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */
   1265          if (i->Xin.Goto.jk != Ijk_Boring
   1266              && i->Xin.Goto.jk != Ijk_Call
   1267              && i->Xin.Goto.jk != Ijk_Ret)
   1268             /* note, this is irrelevant since ebp is not actually
   1269                available to the allocator.  But still .. */
   1270             addHRegUse(u, HRmWrite, hregX86_EBP());
   1271          return;
   1272       case Xin_CMov32:
   1273          addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
   1274          addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
   1275          return;
   1276       case Xin_LoadEX:
   1277          addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
   1278          addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
   1279          return;
   1280       case Xin_Store:
   1281          addHRegUse(u, HRmRead, i->Xin.Store.src);
   1282          addRegUsage_X86AMode(u, i->Xin.Store.dst);
   1283          return;
   1284       case Xin_Set32:
   1285          addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
   1286          return;
   1287       case Xin_Bsfr32:
   1288          addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
   1289          addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
   1290          return;
   1291       case Xin_MFence:
   1292          return;
   1293       case Xin_ACAS:
   1294          addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
   1295          addHRegUse(u, HRmRead, hregX86_EBX());
   1296          addHRegUse(u, HRmModify, hregX86_EAX());
   1297          return;
   1298       case Xin_DACAS:
   1299          addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
   1300          addHRegUse(u, HRmRead, hregX86_ECX());
   1301          addHRegUse(u, HRmRead, hregX86_EBX());
   1302          addHRegUse(u, HRmModify, hregX86_EDX());
   1303          addHRegUse(u, HRmModify, hregX86_EAX());
   1304          return;
   1305       case Xin_FpUnary:
   1306          addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
   1307          addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
   1308          return;
   1309       case Xin_FpBinary:
   1310          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
   1311          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
   1312          addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
   1313          return;
   1314       case Xin_FpLdSt:
   1315          addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
   1316          addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
   1317                        i->Xin.FpLdSt.reg);
   1318          return;
   1319       case Xin_FpLdStI:
   1320          addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
   1321          addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
   1322                        i->Xin.FpLdStI.reg);
   1323          return;
   1324       case Xin_Fp64to32:
   1325          addHRegUse(u, HRmRead,  i->Xin.Fp64to32.src);
   1326          addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
   1327          return;
   1328       case Xin_FpCMov:
   1329          addHRegUse(u, HRmRead,   i->Xin.FpCMov.src);
   1330          addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
   1331          return;
   1332       case Xin_FpLdCW:
   1333          addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
   1334          return;
   1335       case Xin_FpStSW_AX:
   1336          addHRegUse(u, HRmWrite, hregX86_EAX());
   1337          return;
   1338       case Xin_FpCmp:
   1339          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
   1340          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
   1341          addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
   1342          addHRegUse(u, HRmWrite, hregX86_EAX());
   1343          return;
   1344       case Xin_SseLdSt:
   1345          addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
   1346          addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
   1347                        i->Xin.SseLdSt.reg);
   1348          return;
   1349       case Xin_SseLdzLO:
   1350          addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
   1351          addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
   1352          return;
   1353       case Xin_SseConst:
   1354          addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
   1355          return;
   1356       case Xin_Sse32Fx4:
   1357          vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
   1358          unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
   1359                          || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
   1360                          || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
   1361          addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
   1362          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1363                        i->Xin.Sse32Fx4.dst);
   1364          return;
   1365       case Xin_Sse32FLo:
   1366          vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
   1367          unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
   1368                          || i->Xin.Sse32FLo.op == Xsse_RSQRTF
   1369                          || i->Xin.Sse32FLo.op == Xsse_SQRTF );
   1370          addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
   1371          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1372                        i->Xin.Sse32FLo.dst);
   1373          return;
   1374       case Xin_Sse64Fx2:
   1375          vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
   1376          unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
   1377                          || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
   1378                          || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
   1379          addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
   1380          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1381                        i->Xin.Sse64Fx2.dst);
   1382          return;
   1383       case Xin_Sse64FLo:
   1384          vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
   1385          unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
   1386                          || i->Xin.Sse64FLo.op == Xsse_RSQRTF
   1387                          || i->Xin.Sse64FLo.op == Xsse_SQRTF );
   1388          addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
   1389          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1390                        i->Xin.Sse64FLo.dst);
   1391          return;
   1392       case Xin_SseReRg:
   1393          if (i->Xin.SseReRg.op == Xsse_XOR
   1394              && i->Xin.SseReRg.src == i->Xin.SseReRg.dst) {
   1395             /* reg-alloc needs to understand 'xor r,r' as a write of r */
   1396             /* (as opposed to a rite of passage :-) */
   1397             addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
   1398          } else {
   1399             addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
   1400             addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
   1401                              ? HRmWrite : HRmModify,
   1402                           i->Xin.SseReRg.dst);
   1403          }
   1404          return;
   1405       case Xin_SseCMov:
   1406          addHRegUse(u, HRmRead,   i->Xin.SseCMov.src);
   1407          addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
   1408          return;
   1409       case Xin_SseShuf:
   1410          addHRegUse(u, HRmRead,  i->Xin.SseShuf.src);
   1411          addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
   1412          return;
   1413       default:
   1414          ppX86Instr(i, False);
   1415          vpanic("getRegUsage_X86Instr");
   1416    }
   1417 }
   1418 
   1419 /* local helper */
   1420 static void mapReg( HRegRemap* m, HReg* r )
   1421 {
   1422    *r = lookupHRegRemap(m, *r);
   1423 }
   1424 
   1425 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
   1426 {
   1427    vassert(mode64 == False);
   1428    switch (i->tag) {
   1429       case Xin_Alu32R:
   1430          mapRegs_X86RMI(m, i->Xin.Alu32R.src);
   1431          mapReg(m, &i->Xin.Alu32R.dst);
   1432          return;
   1433       case Xin_Alu32M:
   1434          mapRegs_X86RI(m, i->Xin.Alu32M.src);
   1435          mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
   1436          return;
   1437       case Xin_Sh32:
   1438          mapReg(m, &i->Xin.Sh32.dst);
   1439          return;
   1440       case Xin_Test32:
   1441          mapRegs_X86RM(m, i->Xin.Test32.dst);
   1442          return;
   1443       case Xin_Unary32:
   1444          mapReg(m, &i->Xin.Unary32.dst);
   1445          return;
   1446       case Xin_Lea32:
   1447          mapRegs_X86AMode(m, i->Xin.Lea32.am);
   1448          mapReg(m, &i->Xin.Lea32.dst);
   1449          return;
   1450       case Xin_MulL:
   1451          mapRegs_X86RM(m, i->Xin.MulL.src);
   1452          return;
   1453       case Xin_Div:
   1454          mapRegs_X86RM(m, i->Xin.Div.src);
   1455          return;
   1456       case Xin_Sh3232:
   1457          mapReg(m, &i->Xin.Sh3232.src);
   1458          mapReg(m, &i->Xin.Sh3232.dst);
   1459          return;
   1460       case Xin_Push:
   1461          mapRegs_X86RMI(m, i->Xin.Push.src);
   1462          return;
   1463       case Xin_Call:
   1464          return;
   1465       case Xin_Goto:
   1466          mapRegs_X86RI(m, i->Xin.Goto.dst);
   1467          return;
   1468       case Xin_CMov32:
   1469          mapRegs_X86RM(m, i->Xin.CMov32.src);
   1470          mapReg(m, &i->Xin.CMov32.dst);
   1471          return;
   1472       case Xin_LoadEX:
   1473          mapRegs_X86AMode(m, i->Xin.LoadEX.src);
   1474          mapReg(m, &i->Xin.LoadEX.dst);
   1475          return;
   1476       case Xin_Store:
   1477          mapReg(m, &i->Xin.Store.src);
   1478          mapRegs_X86AMode(m, i->Xin.Store.dst);
   1479          return;
   1480       case Xin_Set32:
   1481          mapReg(m, &i->Xin.Set32.dst);
   1482          return;
   1483       case Xin_Bsfr32:
   1484          mapReg(m, &i->Xin.Bsfr32.src);
   1485          mapReg(m, &i->Xin.Bsfr32.dst);
   1486          return;
   1487       case Xin_MFence:
   1488          return;
   1489       case Xin_ACAS:
   1490          mapRegs_X86AMode(m, i->Xin.ACAS.addr);
   1491          return;
   1492       case Xin_DACAS:
   1493          mapRegs_X86AMode(m, i->Xin.DACAS.addr);
   1494          return;
   1495       case Xin_FpUnary:
   1496          mapReg(m, &i->Xin.FpUnary.src);
   1497          mapReg(m, &i->Xin.FpUnary.dst);
   1498          return;
   1499       case Xin_FpBinary:
   1500          mapReg(m, &i->Xin.FpBinary.srcL);
   1501          mapReg(m, &i->Xin.FpBinary.srcR);
   1502          mapReg(m, &i->Xin.FpBinary.dst);
   1503          return;
   1504       case Xin_FpLdSt:
   1505          mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
   1506          mapReg(m, &i->Xin.FpLdSt.reg);
   1507          return;
   1508       case Xin_FpLdStI:
   1509          mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
   1510          mapReg(m, &i->Xin.FpLdStI.reg);
   1511          return;
   1512       case Xin_Fp64to32:
   1513          mapReg(m, &i->Xin.Fp64to32.src);
   1514          mapReg(m, &i->Xin.Fp64to32.dst);
   1515          return;
   1516       case Xin_FpCMov:
   1517          mapReg(m, &i->Xin.FpCMov.src);
   1518          mapReg(m, &i->Xin.FpCMov.dst);
   1519          return;
   1520       case Xin_FpLdCW:
   1521          mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
   1522          return;
   1523       case Xin_FpStSW_AX:
   1524          return;
   1525       case Xin_FpCmp:
   1526          mapReg(m, &i->Xin.FpCmp.srcL);
   1527          mapReg(m, &i->Xin.FpCmp.srcR);
   1528          mapReg(m, &i->Xin.FpCmp.dst);
   1529          return;
   1530       case Xin_SseConst:
   1531          mapReg(m, &i->Xin.SseConst.dst);
   1532          return;
   1533       case Xin_SseLdSt:
   1534          mapReg(m, &i->Xin.SseLdSt.reg);
   1535          mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
   1536          break;
   1537       case Xin_SseLdzLO:
   1538          mapReg(m, &i->Xin.SseLdzLO.reg);
   1539          mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
   1540          break;
   1541       case Xin_Sse32Fx4:
   1542          mapReg(m, &i->Xin.Sse32Fx4.src);
   1543          mapReg(m, &i->Xin.Sse32Fx4.dst);
   1544          return;
   1545       case Xin_Sse32FLo:
   1546          mapReg(m, &i->Xin.Sse32FLo.src);
   1547          mapReg(m, &i->Xin.Sse32FLo.dst);
   1548          return;
   1549       case Xin_Sse64Fx2:
   1550          mapReg(m, &i->Xin.Sse64Fx2.src);
   1551          mapReg(m, &i->Xin.Sse64Fx2.dst);
   1552          return;
   1553       case Xin_Sse64FLo:
   1554          mapReg(m, &i->Xin.Sse64FLo.src);
   1555          mapReg(m, &i->Xin.Sse64FLo.dst);
   1556          return;
   1557       case Xin_SseReRg:
   1558          mapReg(m, &i->Xin.SseReRg.src);
   1559          mapReg(m, &i->Xin.SseReRg.dst);
   1560          return;
   1561       case Xin_SseCMov:
   1562          mapReg(m, &i->Xin.SseCMov.src);
   1563          mapReg(m, &i->Xin.SseCMov.dst);
   1564          return;
   1565       case Xin_SseShuf:
   1566          mapReg(m, &i->Xin.SseShuf.src);
   1567          mapReg(m, &i->Xin.SseShuf.dst);
   1568          return;
   1569       default:
   1570          ppX86Instr(i, mode64);
   1571          vpanic("mapRegs_X86Instr");
   1572    }
   1573 }
   1574 
   1575 /* Figure out if i represents a reg-reg move, and if so assign the
   1576    source and destination to *src and *dst.  If in doubt say No.  Used
   1577    by the register allocator to do move coalescing.
   1578 */
   1579 Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst )
   1580 {
   1581    /* Moves between integer regs */
   1582    if (i->tag == Xin_Alu32R) {
   1583       if (i->Xin.Alu32R.op != Xalu_MOV)
   1584          return False;
   1585       if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
   1586          return False;
   1587       *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
   1588       *dst = i->Xin.Alu32R.dst;
   1589       return True;
   1590    }
   1591    /* Moves between FP regs */
   1592    if (i->tag == Xin_FpUnary) {
   1593       if (i->Xin.FpUnary.op != Xfp_MOV)
   1594          return False;
   1595       *src = i->Xin.FpUnary.src;
   1596       *dst = i->Xin.FpUnary.dst;
   1597       return True;
   1598    }
   1599    if (i->tag == Xin_SseReRg) {
   1600       if (i->Xin.SseReRg.op != Xsse_MOV)
   1601          return False;
   1602       *src = i->Xin.SseReRg.src;
   1603       *dst = i->Xin.SseReRg.dst;
   1604       return True;
   1605    }
   1606    return False;
   1607 }
   1608 
   1609 
   1610 /* Generate x86 spill/reload instructions under the direction of the
   1611    register allocator.  Note it's critical these don't write the
   1612    condition codes. */
   1613 
   1614 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1615                     HReg rreg, Int offsetB, Bool mode64 )
   1616 {
   1617    X86AMode* am;
   1618    vassert(offsetB >= 0);
   1619    vassert(!hregIsVirtual(rreg));
   1620    vassert(mode64 == False);
   1621    *i1 = *i2 = NULL;
   1622    am = X86AMode_IR(offsetB, hregX86_EBP());
   1623    switch (hregClass(rreg)) {
   1624       case HRcInt32:
   1625          *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
   1626          return;
   1627       case HRcFlt64:
   1628          *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
   1629          return;
   1630       case HRcVec128:
   1631          *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
   1632          return;
   1633       default:
   1634          ppHRegClass(hregClass(rreg));
   1635          vpanic("genSpill_X86: unimplemented regclass");
   1636    }
   1637 }
   1638 
   1639 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1640                      HReg rreg, Int offsetB, Bool mode64 )
   1641 {
   1642    X86AMode* am;
   1643    vassert(offsetB >= 0);
   1644    vassert(!hregIsVirtual(rreg));
   1645    vassert(mode64 == False);
   1646    *i1 = *i2 = NULL;
   1647    am = X86AMode_IR(offsetB, hregX86_EBP());
   1648    switch (hregClass(rreg)) {
   1649       case HRcInt32:
   1650          *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
   1651          return;
   1652       case HRcFlt64:
   1653          *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
   1654          return;
   1655       case HRcVec128:
   1656          *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
   1657          return;
   1658       default:
   1659          ppHRegClass(hregClass(rreg));
   1660          vpanic("genReload_X86: unimplemented regclass");
   1661    }
   1662 }
   1663 
   1664 /* The given instruction reads the specified vreg exactly once, and
   1665    that vreg is currently located at the given spill offset.  If
   1666    possible, return a variant of the instruction to one which instead
   1667    references the spill slot directly. */
   1668 
   1669 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
   1670 {
   1671    vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
   1672 
   1673    /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
   1674       Convert to: src=RMI_Mem, dst=Reg
   1675    */
   1676    if (i->tag == Xin_Alu32R
   1677        && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
   1678            || i->Xin.Alu32R.op == Xalu_XOR)
   1679        && i->Xin.Alu32R.src->tag == Xrmi_Reg
   1680        && i->Xin.Alu32R.src->Xrmi.Reg.reg == vreg) {
   1681       vassert(i->Xin.Alu32R.dst != vreg);
   1682       return X86Instr_Alu32R(
   1683                 i->Xin.Alu32R.op,
   1684                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
   1685                 i->Xin.Alu32R.dst
   1686              );
   1687    }
   1688 
   1689    /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
   1690       Convert to: src=RI_Imm, dst=Mem
   1691    */
   1692    if (i->tag == Xin_Alu32R
   1693        && (i->Xin.Alu32R.op == Xalu_CMP)
   1694        && i->Xin.Alu32R.src->tag == Xrmi_Imm
   1695        && i->Xin.Alu32R.dst == vreg) {
   1696       return X86Instr_Alu32M(
   1697                 i->Xin.Alu32R.op,
   1698 		X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
   1699                 X86AMode_IR( spill_off, hregX86_EBP())
   1700              );
   1701    }
   1702 
   1703    /* Deal with form: Push(RMI_Reg)
   1704       Convert to: Push(RMI_Mem)
   1705    */
   1706    if (i->tag == Xin_Push
   1707        && i->Xin.Push.src->tag == Xrmi_Reg
   1708        && i->Xin.Push.src->Xrmi.Reg.reg == vreg) {
   1709       return X86Instr_Push(
   1710                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
   1711              );
   1712    }
   1713 
   1714    /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
   1715       Convert to CMov32(RM_Mem, dst) */
   1716    if (i->tag == Xin_CMov32
   1717        && i->Xin.CMov32.src->tag == Xrm_Reg
   1718        && i->Xin.CMov32.src->Xrm.Reg.reg == vreg) {
   1719       vassert(i->Xin.CMov32.dst != vreg);
   1720       return X86Instr_CMov32(
   1721                 i->Xin.CMov32.cond,
   1722                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
   1723                 i->Xin.CMov32.dst
   1724              );
   1725    }
   1726 
   1727    /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
   1728    if (i->tag == Xin_Test32
   1729        && i->Xin.Test32.dst->tag == Xrm_Reg
   1730        && i->Xin.Test32.dst->Xrm.Reg.reg == vreg) {
   1731       return X86Instr_Test32(
   1732                 i->Xin.Test32.imm32,
   1733                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
   1734              );
   1735    }
   1736 
   1737    return NULL;
   1738 }
   1739 
   1740 
   1741 /* --------- The x86 assembler (bleh.) --------- */
   1742 
   1743 static UChar iregNo ( HReg r )
   1744 {
   1745    UInt n;
   1746    vassert(hregClass(r) == HRcInt32);
   1747    vassert(!hregIsVirtual(r));
   1748    n = hregNumber(r);
   1749    vassert(n <= 7);
   1750    return toUChar(n);
   1751 }
   1752 
   1753 static UInt fregNo ( HReg r )
   1754 {
   1755    UInt n;
   1756    vassert(hregClass(r) == HRcFlt64);
   1757    vassert(!hregIsVirtual(r));
   1758    n = hregNumber(r);
   1759    vassert(n <= 5);
   1760    return n;
   1761 }
   1762 
   1763 static UInt vregNo ( HReg r )
   1764 {
   1765    UInt n;
   1766    vassert(hregClass(r) == HRcVec128);
   1767    vassert(!hregIsVirtual(r));
   1768    n = hregNumber(r);
   1769    vassert(n <= 7);
   1770    return n;
   1771 }
   1772 
   1773 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
   1774 {
   1775    return toUChar( ((mod & 3) << 6)
   1776                    | ((reg & 7) << 3)
   1777                    | (regmem & 7) );
   1778 }
   1779 
   1780 static UChar mkSIB ( Int shift, Int regindex, Int regbase )
   1781 {
   1782    return toUChar( ((shift & 3) << 6)
   1783                    | ((regindex & 7) << 3)
   1784                    | (regbase & 7) );
   1785 }
   1786 
   1787 static UChar* emit32 ( UChar* p, UInt w32 )
   1788 {
   1789    *p++ = toUChar( w32        & 0x000000FF);
   1790    *p++ = toUChar((w32 >>  8) & 0x000000FF);
   1791    *p++ = toUChar((w32 >> 16) & 0x000000FF);
   1792    *p++ = toUChar((w32 >> 24) & 0x000000FF);
   1793    return p;
   1794 }
   1795 
   1796 /* Does a sign-extend of the lowest 8 bits give
   1797    the original number? */
   1798 static Bool fits8bits ( UInt w32 )
   1799 {
   1800    Int i32 = (Int)w32;
   1801    return toBool(i32 == ((i32 << 24) >> 24));
   1802 }
   1803 
   1804 
   1805 /* Forming mod-reg-rm bytes and scale-index-base bytes.
   1806 
   1807      greg,  0(ereg)    |  ereg != ESP && ereg != EBP
   1808                        =  00 greg ereg
   1809 
   1810      greg,  d8(ereg)   |  ereg != ESP
   1811                        =  01 greg ereg, d8
   1812 
   1813      greg,  d32(ereg)  |  ereg != ESP
   1814                        =  10 greg ereg, d32
   1815 
   1816      greg,  d8(%esp)   =  01 greg 100, 0x24, d8
   1817 
   1818      -----------------------------------------------
   1819 
   1820      greg,  d8(base,index,scale)
   1821                |  index != ESP
   1822                =  01 greg 100, scale index base, d8
   1823 
   1824      greg,  d32(base,index,scale)
   1825                |  index != ESP
   1826                =  10 greg 100, scale index base, d32
   1827 */
   1828 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
   1829 {
   1830    if (am->tag == Xam_IR) {
   1831       if (am->Xam.IR.imm == 0
   1832           && am->Xam.IR.reg != hregX86_ESP()
   1833           && am->Xam.IR.reg != hregX86_EBP() ) {
   1834          *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg));
   1835          return p;
   1836       }
   1837       if (fits8bits(am->Xam.IR.imm)
   1838           && am->Xam.IR.reg != hregX86_ESP()) {
   1839          *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg));
   1840          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
   1841          return p;
   1842       }
   1843       if (am->Xam.IR.reg != hregX86_ESP()) {
   1844          *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg));
   1845          p = emit32(p, am->Xam.IR.imm);
   1846          return p;
   1847       }
   1848       if (am->Xam.IR.reg == hregX86_ESP()
   1849           && fits8bits(am->Xam.IR.imm)) {
   1850  	 *p++ = mkModRegRM(1, iregNo(greg), 4);
   1851          *p++ = 0x24;
   1852          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
   1853          return p;
   1854       }
   1855       ppX86AMode(am);
   1856       vpanic("doAMode_M: can't emit amode IR");
   1857       /*NOTREACHED*/
   1858    }
   1859    if (am->tag == Xam_IRRS) {
   1860       if (fits8bits(am->Xam.IRRS.imm)
   1861           && am->Xam.IRRS.index != hregX86_ESP()) {
   1862          *p++ = mkModRegRM(1, iregNo(greg), 4);
   1863          *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index,
   1864                                           am->Xam.IRRS.base);
   1865          *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
   1866          return p;
   1867       }
   1868       if (am->Xam.IRRS.index != hregX86_ESP()) {
   1869          *p++ = mkModRegRM(2, iregNo(greg), 4);
   1870          *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index,
   1871                                           am->Xam.IRRS.base);
   1872          p = emit32(p, am->Xam.IRRS.imm);
   1873          return p;
   1874       }
   1875       ppX86AMode(am);
   1876       vpanic("doAMode_M: can't emit amode IRRS");
   1877       /*NOTREACHED*/
   1878    }
   1879    vpanic("doAMode_M: unknown amode");
   1880    /*NOTREACHED*/
   1881 }
   1882 
   1883 
   1884 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
   1885 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
   1886 {
   1887    *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg));
   1888    return p;
   1889 }
   1890 
   1891 
   1892 /* Emit ffree %st(7) */
   1893 static UChar* do_ffree_st7 ( UChar* p )
   1894 {
   1895    *p++ = 0xDD;
   1896    *p++ = 0xC7;
   1897    return p;
   1898 }
   1899 
   1900 /* Emit fstp %st(i), 1 <= i <= 7 */
   1901 static UChar* do_fstp_st ( UChar* p, Int i )
   1902 {
   1903    vassert(1 <= i && i <= 7);
   1904    *p++ = 0xDD;
   1905    *p++ = toUChar(0xD8+i);
   1906    return p;
   1907 }
   1908 
   1909 /* Emit fld %st(i), 0 <= i <= 6 */
   1910 static UChar* do_fld_st ( UChar* p, Int i )
   1911 {
   1912    vassert(0 <= i && i <= 6);
   1913    *p++ = 0xD9;
   1914    *p++ = toUChar(0xC0+i);
   1915    return p;
   1916 }
   1917 
   1918 /* Emit f<op> %st(0) */
   1919 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
   1920 {
   1921    switch (op) {
   1922       case Xfp_NEG:    *p++ = 0xD9; *p++ = 0xE0; break;
   1923       case Xfp_ABS:    *p++ = 0xD9; *p++ = 0xE1; break;
   1924       case Xfp_SQRT:   *p++ = 0xD9; *p++ = 0xFA; break;
   1925       case Xfp_ROUND:  *p++ = 0xD9; *p++ = 0xFC; break;
   1926       case Xfp_SIN:    *p++ = 0xD9; *p++ = 0xFE; break;
   1927       case Xfp_COS:    *p++ = 0xD9; *p++ = 0xFF; break;
   1928       case Xfp_2XM1:   *p++ = 0xD9; *p++ = 0xF0; break;
   1929       case Xfp_MOV:    break;
   1930       case Xfp_TAN:    p = do_ffree_st7(p); /* since fptan pushes 1.0 */
   1931                        *p++ = 0xD9; *p++ = 0xF2; /* fptan */
   1932                        *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
   1933                        break;
   1934       default: vpanic("do_fop1_st: unknown op");
   1935    }
   1936    return p;
   1937 }
   1938 
   1939 /* Emit f<op> %st(i), 1 <= i <= 5 */
   1940 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
   1941 {
   1942 #  define fake(_n) mkHReg((_n), HRcInt32, False)
   1943    Int subopc;
   1944    switch (op) {
   1945       case Xfp_ADD: subopc = 0; break;
   1946       case Xfp_SUB: subopc = 4; break;
   1947       case Xfp_MUL: subopc = 1; break;
   1948       case Xfp_DIV: subopc = 6; break;
   1949       default: vpanic("do_fop2_st: unknown op");
   1950    }
   1951    *p++ = 0xD8;
   1952    p    = doAMode_R(p, fake(subopc), fake(i));
   1953    return p;
   1954 #  undef fake
   1955 }
   1956 
   1957 /* Push a 32-bit word on the stack.  The word depends on tags[3:0];
   1958 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
   1959 */
   1960 static UChar* push_word_from_tags ( UChar* p, UShort tags )
   1961 {
   1962    UInt w;
   1963    vassert(0 == (tags & ~0xF));
   1964    if (tags == 0) {
   1965       /* pushl $0x00000000 */
   1966       *p++ = 0x6A;
   1967       *p++ = 0x00;
   1968    }
   1969    else
   1970    /* pushl $0xFFFFFFFF */
   1971    if (tags == 0xF) {
   1972       *p++ = 0x6A;
   1973       *p++ = 0xFF;
   1974    } else {
   1975       vassert(0); /* awaiting test case */
   1976       w = 0;
   1977       if (tags & 1) w |= 0x000000FF;
   1978       if (tags & 2) w |= 0x0000FF00;
   1979       if (tags & 4) w |= 0x00FF0000;
   1980       if (tags & 8) w |= 0xFF000000;
   1981       *p++ = 0x68;
   1982       p = emit32(p, w);
   1983    }
   1984    return p;
   1985 }
   1986 
   1987 /* Emit an instruction into buf and return the number of bytes used.
   1988    Note that buf is not the insn's final place, and therefore it is
   1989    imperative to emit position-independent code. */
   1990 
   1991 Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i,
   1992                     Bool mode64, void* dispatch )
   1993 {
   1994    UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
   1995 
   1996    UInt   xtra;
   1997    UChar* p = &buf[0];
   1998    UChar* ptmp;
   1999    vassert(nbuf >= 32);
   2000    vassert(mode64 == False);
   2001 
   2002    /* Wrap an integer as a int register, for use assembling
   2003       GrpN insns, in which the greg field is used as a sub-opcode
   2004       and does not really contain a register. */
   2005 #  define fake(_n) mkHReg((_n), HRcInt32, False)
   2006 
   2007    /* vex_printf("asm  ");ppX86Instr(i, mode64); vex_printf("\n"); */
   2008 
   2009    switch (i->tag) {
   2010 
   2011    case Xin_Alu32R:
   2012       /* Deal specially with MOV */
   2013       if (i->Xin.Alu32R.op == Xalu_MOV) {
   2014          switch (i->Xin.Alu32R.src->tag) {
   2015             case Xrmi_Imm:
   2016                *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst));
   2017                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2018                goto done;
   2019             case Xrmi_Reg:
   2020                *p++ = 0x89;
   2021                p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
   2022                                 i->Xin.Alu32R.dst);
   2023                goto done;
   2024             case Xrmi_Mem:
   2025                *p++ = 0x8B;
   2026                p = doAMode_M(p, i->Xin.Alu32R.dst,
   2027                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
   2028                goto done;
   2029             default:
   2030                goto bad;
   2031          }
   2032       }
   2033       /* MUL */
   2034       if (i->Xin.Alu32R.op == Xalu_MUL) {
   2035          switch (i->Xin.Alu32R.src->tag) {
   2036             case Xrmi_Reg:
   2037                *p++ = 0x0F;
   2038                *p++ = 0xAF;
   2039                p = doAMode_R(p, i->Xin.Alu32R.dst,
   2040                                 i->Xin.Alu32R.src->Xrmi.Reg.reg);
   2041                goto done;
   2042             case Xrmi_Mem:
   2043                *p++ = 0x0F;
   2044                *p++ = 0xAF;
   2045                p = doAMode_M(p, i->Xin.Alu32R.dst,
   2046                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
   2047                goto done;
   2048             case Xrmi_Imm:
   2049                if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2050                   *p++ = 0x6B;
   2051                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
   2052                   *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2053                } else {
   2054                   *p++ = 0x69;
   2055                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
   2056                   p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2057                }
   2058                goto done;
   2059             default:
   2060                goto bad;
   2061          }
   2062       }
   2063       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
   2064       opc = opc_rr = subopc_imm = opc_imma = 0;
   2065       switch (i->Xin.Alu32R.op) {
   2066          case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
   2067                         subopc_imm = 2; opc_imma = 0x15; break;
   2068          case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
   2069                         subopc_imm = 0; opc_imma = 0x05; break;
   2070          case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
   2071                         subopc_imm = 5; opc_imma = 0x2D; break;
   2072          case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
   2073                         subopc_imm = 3; opc_imma = 0x1D; break;
   2074          case Xalu_AND: opc = 0x23; opc_rr = 0x21;
   2075                         subopc_imm = 4; opc_imma = 0x25; break;
   2076          case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
   2077                         subopc_imm = 6; opc_imma = 0x35; break;
   2078          case Xalu_OR:  opc = 0x0B; opc_rr = 0x09;
   2079                         subopc_imm = 1; opc_imma = 0x0D; break;
   2080          case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
   2081                         subopc_imm = 7; opc_imma = 0x3D; break;
   2082          default: goto bad;
   2083       }
   2084       switch (i->Xin.Alu32R.src->tag) {
   2085          case Xrmi_Imm:
   2086             if (i->Xin.Alu32R.dst == hregX86_EAX()
   2087                 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2088                *p++ = toUChar(opc_imma);
   2089                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2090             } else
   2091             if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2092                *p++ = 0x83;
   2093                p    = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
   2094                *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2095             } else {
   2096                *p++ = 0x81;
   2097                p    = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
   2098                p    = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2099             }
   2100             goto done;
   2101          case Xrmi_Reg:
   2102             *p++ = toUChar(opc_rr);
   2103             p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
   2104                              i->Xin.Alu32R.dst);
   2105             goto done;
   2106          case Xrmi_Mem:
   2107             *p++ = toUChar(opc);
   2108             p = doAMode_M(p, i->Xin.Alu32R.dst,
   2109                              i->Xin.Alu32R.src->Xrmi.Mem.am);
   2110             goto done;
   2111          default:
   2112             goto bad;
   2113       }
   2114       break;
   2115 
   2116    case Xin_Alu32M:
   2117       /* Deal specially with MOV */
   2118       if (i->Xin.Alu32M.op == Xalu_MOV) {
   2119          switch (i->Xin.Alu32M.src->tag) {
   2120             case Xri_Reg:
   2121                *p++ = 0x89;
   2122                p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2123                                 i->Xin.Alu32M.dst);
   2124                goto done;
   2125             case Xri_Imm:
   2126                *p++ = 0xC7;
   2127                p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst);
   2128                p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2129                goto done;
   2130             default:
   2131                goto bad;
   2132          }
   2133       }
   2134       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP.  MUL is not
   2135          allowed here. */
   2136       opc = subopc_imm = opc_imma = 0;
   2137       switch (i->Xin.Alu32M.op) {
   2138          case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
   2139          case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
   2140          case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
   2141          default: goto bad;
   2142       }
   2143       switch (i->Xin.Alu32M.src->tag) {
   2144          case Xri_Reg:
   2145             *p++ = toUChar(opc);
   2146             p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2147                              i->Xin.Alu32M.dst);
   2148             goto done;
   2149          case Xri_Imm:
   2150             if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
   2151                *p++ = 0x83;
   2152                p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
   2153                *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
   2154                goto done;
   2155             } else {
   2156                *p++ = 0x81;
   2157                p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
   2158                p    = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2159                goto done;
   2160             }
   2161          default:
   2162             goto bad;
   2163       }
   2164       break;
   2165 
   2166    case Xin_Sh32:
   2167       opc_cl = opc_imm = subopc = 0;
   2168       switch (i->Xin.Sh32.op) {
   2169          case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
   2170          case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
   2171          case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
   2172          default: goto bad;
   2173       }
   2174       if (i->Xin.Sh32.src == 0) {
   2175          *p++ = toUChar(opc_cl);
   2176          p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
   2177       } else {
   2178          *p++ = toUChar(opc_imm);
   2179          p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
   2180          *p++ = (UChar)(i->Xin.Sh32.src);
   2181       }
   2182       goto done;
   2183 
   2184    case Xin_Test32:
   2185       if (i->Xin.Test32.dst->tag == Xrm_Reg) {
   2186          /* testl $imm32, %reg */
   2187          *p++ = 0xF7;
   2188          p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg);
   2189          p = emit32(p, i->Xin.Test32.imm32);
   2190          goto done;
   2191       } else {
   2192          /* testl $imm32, amode */
   2193          *p++ = 0xF7;
   2194          p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am);
   2195          p = emit32(p, i->Xin.Test32.imm32);
   2196          goto done;
   2197       }
   2198 
   2199    case Xin_Unary32:
   2200       if (i->Xin.Unary32.op == Xun_NOT) {
   2201          *p++ = 0xF7;
   2202          p = doAMode_R(p, fake(2), i->Xin.Unary32.dst);
   2203          goto done;
   2204       }
   2205       if (i->Xin.Unary32.op == Xun_NEG) {
   2206          *p++ = 0xF7;
   2207          p = doAMode_R(p, fake(3), i->Xin.Unary32.dst);
   2208          goto done;
   2209       }
   2210       break;
   2211 
   2212    case Xin_Lea32:
   2213       *p++ = 0x8D;
   2214       p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
   2215       goto done;
   2216 
   2217    case Xin_MulL:
   2218       subopc = i->Xin.MulL.syned ? 5 : 4;
   2219       *p++ = 0xF7;
   2220       switch (i->Xin.MulL.src->tag)  {
   2221          case Xrm_Mem:
   2222             p = doAMode_M(p, fake(subopc),
   2223                              i->Xin.MulL.src->Xrm.Mem.am);
   2224             goto done;
   2225          case Xrm_Reg:
   2226             p = doAMode_R(p, fake(subopc),
   2227                              i->Xin.MulL.src->Xrm.Reg.reg);
   2228             goto done;
   2229          default:
   2230             goto bad;
   2231       }
   2232       break;
   2233 
   2234    case Xin_Div:
   2235       subopc = i->Xin.Div.syned ? 7 : 6;
   2236       *p++ = 0xF7;
   2237       switch (i->Xin.Div.src->tag)  {
   2238          case Xrm_Mem:
   2239             p = doAMode_M(p, fake(subopc),
   2240                              i->Xin.Div.src->Xrm.Mem.am);
   2241             goto done;
   2242          case Xrm_Reg:
   2243             p = doAMode_R(p, fake(subopc),
   2244                              i->Xin.Div.src->Xrm.Reg.reg);
   2245             goto done;
   2246          default:
   2247             goto bad;
   2248       }
   2249       break;
   2250 
   2251    case Xin_Sh3232:
   2252       vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
   2253       if (i->Xin.Sh3232.amt == 0) {
   2254          /* shldl/shrdl by %cl */
   2255          *p++ = 0x0F;
   2256          if (i->Xin.Sh3232.op == Xsh_SHL) {
   2257             *p++ = 0xA5;
   2258          } else {
   2259             *p++ = 0xAD;
   2260          }
   2261          p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
   2262          goto done;
   2263       }
   2264       break;
   2265 
   2266    case Xin_Push:
   2267       switch (i->Xin.Push.src->tag) {
   2268          case Xrmi_Mem:
   2269             *p++ = 0xFF;
   2270             p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am);
   2271             goto done;
   2272          case Xrmi_Imm:
   2273             *p++ = 0x68;
   2274             p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
   2275             goto done;
   2276          case Xrmi_Reg:
   2277             *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg));
   2278             goto done;
   2279         default:
   2280             goto bad;
   2281       }
   2282 
   2283    case Xin_Call:
   2284       /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
   2285          for explanation of this. */
   2286       switch (i->Xin.Call.regparms) {
   2287          case 0: irno = iregNo(hregX86_EAX()); break;
   2288          case 1: irno = iregNo(hregX86_EDX()); break;
   2289          case 2: irno = iregNo(hregX86_ECX()); break;
   2290          case 3: irno = iregNo(hregX86_EDI()); break;
   2291          default: vpanic(" emit_X86Instr:call:regparms");
   2292       }
   2293       /* jump over the following two insns if the condition does not
   2294          hold */
   2295       if (i->Xin.Call.cond != Xcc_ALWAYS) {
   2296          *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
   2297          *p++ = 0x07; /* 7 bytes in the next two insns */
   2298       }
   2299       /* movl $target, %tmp */
   2300       *p++ = toUChar(0xB8 + irno);
   2301       p = emit32(p, i->Xin.Call.target);
   2302       /* call *%tmp */
   2303       *p++ = 0xFF;
   2304       *p++ = toUChar(0xD0 + irno);
   2305       goto done;
   2306 
   2307    case Xin_Goto:
   2308       /* Use ptmp for backpatching conditional jumps. */
   2309       ptmp = NULL;
   2310 
   2311       /* First off, if this is conditional, create a conditional
   2312 	 jump over the rest of it. */
   2313       if (i->Xin.Goto.cond != Xcc_ALWAYS) {
   2314          /* jmp fwds if !condition */
   2315          *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1)));
   2316          ptmp = p; /* fill in this bit later */
   2317          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2318       }
   2319 
   2320       /* If a non-boring, set %ebp (the guest state pointer)
   2321          appropriately. */
   2322       /* movl $magic_number, %ebp */
   2323       switch (i->Xin.Goto.jk) {
   2324          case Ijk_ClientReq:
   2325             *p++ = 0xBD;
   2326             p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
   2327          case Ijk_Sys_int128:
   2328             *p++ = 0xBD;
   2329             p = emit32(p, VEX_TRC_JMP_SYS_INT128); break;
   2330          case Ijk_Sys_int129:
   2331             *p++ = 0xBD;
   2332             p = emit32(p, VEX_TRC_JMP_SYS_INT129); break;
   2333          case Ijk_Sys_int130:
   2334             *p++ = 0xBD;
   2335             p = emit32(p, VEX_TRC_JMP_SYS_INT130); break;
   2336          case Ijk_Yield:
   2337             *p++ = 0xBD;
   2338             p = emit32(p, VEX_TRC_JMP_YIELD); break;
   2339          case Ijk_YieldNoRedir:
   2340             *p++ = 0xBD;
   2341             p = emit32(p, VEX_TRC_JMP_YIELD_NOREDIR); break;
   2342          case Ijk_EmWarn:
   2343             *p++ = 0xBD;
   2344             p = emit32(p, VEX_TRC_JMP_EMWARN); break;
   2345          case Ijk_MapFail:
   2346             *p++ = 0xBD;
   2347             p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
   2348          case Ijk_NoDecode:
   2349             *p++ = 0xBD;
   2350             p = emit32(p, VEX_TRC_JMP_NODECODE); break;
   2351          case Ijk_TInval:
   2352             *p++ = 0xBD;
   2353             p = emit32(p, VEX_TRC_JMP_TINVAL); break;
   2354          case Ijk_NoRedir:
   2355             *p++ = 0xBD;
   2356             p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
   2357          case Ijk_Sys_sysenter:
   2358             *p++ = 0xBD;
   2359             p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break;
   2360          case Ijk_SigTRAP:
   2361             *p++ = 0xBD;
   2362             p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
   2363          case Ijk_SigSEGV:
   2364             *p++ = 0xBD;
   2365             p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
   2366          case Ijk_Ret:
   2367 	 case Ijk_Call:
   2368          case Ijk_Boring:
   2369             break;
   2370          default:
   2371             ppIRJumpKind(i->Xin.Goto.jk);
   2372             vpanic("emit_X86Instr.Xin_Goto: unknown jump kind");
   2373       }
   2374 
   2375       /* Get the destination address into %eax */
   2376       if (i->Xin.Goto.dst->tag == Xri_Imm) {
   2377          /* movl $immediate, %eax */
   2378          *p++ = 0xB8;
   2379          p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32);
   2380       } else {
   2381          vassert(i->Xin.Goto.dst->tag == Xri_Reg);
   2382          /* movl %reg, %eax */
   2383          if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) {
   2384             *p++ = 0x89;
   2385             p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX());
   2386          }
   2387       }
   2388 
   2389       /* Get the dispatcher address into %edx.  This has to happen
   2390          after the load of %eax since %edx might be carrying the value
   2391          destined for %eax immediately prior to this Xin_Goto. */
   2392       vassert(sizeof(UInt) == sizeof(void*));
   2393       vassert(dispatch != NULL);
   2394       /* movl $imm32, %edx */
   2395       *p++ = 0xBA;
   2396       p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
   2397 
   2398       /* jmp *%edx */
   2399       *p++ = 0xFF;
   2400       *p++ = 0xE2;
   2401 
   2402       /* Fix up the conditional jump, if there was one. */
   2403       if (i->Xin.Goto.cond != Xcc_ALWAYS) {
   2404          Int delta = p - ptmp;
   2405 	 vassert(delta > 0 && delta < 20);
   2406          *ptmp = toUChar(delta-1);
   2407       }
   2408       goto done;
   2409 
   2410    case Xin_CMov32:
   2411       vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
   2412 
   2413       /* This generates cmov, which is illegal on P54/P55. */
   2414       /*
   2415       *p++ = 0x0F;
   2416       *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
   2417       if (i->Xin.CMov32.src->tag == Xrm_Reg) {
   2418          p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
   2419          goto done;
   2420       }
   2421       if (i->Xin.CMov32.src->tag == Xrm_Mem) {
   2422          p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
   2423          goto done;
   2424       }
   2425       */
   2426 
   2427       /* Alternative version which works on any x86 variant. */
   2428       /* jmp fwds if !condition */
   2429       *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
   2430       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   2431       ptmp = p;
   2432 
   2433       switch (i->Xin.CMov32.src->tag) {
   2434          case Xrm_Reg:
   2435             /* Big sigh.  This is movl E -> G ... */
   2436             *p++ = 0x89;
   2437             p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
   2438                              i->Xin.CMov32.dst);
   2439 
   2440             break;
   2441          case Xrm_Mem:
   2442             /* ... whereas this is movl G -> E.  That's why the args
   2443                to doAMode_R appear to be the wrong way round in the
   2444                Xrm_Reg case. */
   2445             *p++ = 0x8B;
   2446             p = doAMode_M(p, i->Xin.CMov32.dst,
   2447                              i->Xin.CMov32.src->Xrm.Mem.am);
   2448             break;
   2449          default:
   2450             goto bad;
   2451       }
   2452       /* Fill in the jump offset. */
   2453       *(ptmp-1) = toUChar(p - ptmp);
   2454       goto done;
   2455 
   2456       break;
   2457 
   2458    case Xin_LoadEX:
   2459       if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
   2460          /* movzbl */
   2461          *p++ = 0x0F;
   2462          *p++ = 0xB6;
   2463          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2464          goto done;
   2465       }
   2466       if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
   2467          /* movzwl */
   2468          *p++ = 0x0F;
   2469          *p++ = 0xB7;
   2470          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2471          goto done;
   2472       }
   2473       if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
   2474          /* movsbl */
   2475          *p++ = 0x0F;
   2476          *p++ = 0xBE;
   2477          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2478          goto done;
   2479       }
   2480       break;
   2481 
   2482    case Xin_Set32:
   2483       /* Make the destination register be 1 or 0, depending on whether
   2484          the relevant condition holds.  We have to dodge and weave
   2485          when the destination is %esi or %edi as we cannot directly
   2486          emit the native 'setb %reg' for those.  Further complication:
   2487          the top 24 bits of the destination should be forced to zero,
   2488          but doing 'xor %r,%r' kills the flag(s) we are about to read.
   2489          Sigh.  So start off my moving $0 into the dest. */
   2490 
   2491       /* Do we need to swap in %eax? */
   2492       if (iregNo(i->Xin.Set32.dst) >= 4) {
   2493          /* xchg %eax, %dst */
   2494          *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
   2495          /* movl $0, %eax */
   2496          *p++ =toUChar(0xB8 + iregNo(hregX86_EAX()));
   2497          p = emit32(p, 0);
   2498          /* setb lo8(%eax) */
   2499          *p++ = 0x0F;
   2500          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
   2501          p = doAMode_R(p, fake(0), hregX86_EAX());
   2502          /* xchg %eax, %dst */
   2503          *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
   2504       } else {
   2505          /* movl $0, %dst */
   2506          *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst));
   2507          p = emit32(p, 0);
   2508          /* setb lo8(%dst) */
   2509          *p++ = 0x0F;
   2510          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
   2511          p = doAMode_R(p, fake(0), i->Xin.Set32.dst);
   2512       }
   2513       goto done;
   2514 
   2515    case Xin_Bsfr32:
   2516       *p++ = 0x0F;
   2517       if (i->Xin.Bsfr32.isFwds) {
   2518          *p++ = 0xBC;
   2519       } else {
   2520          *p++ = 0xBD;
   2521       }
   2522       p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
   2523       goto done;
   2524 
   2525    case Xin_MFence:
   2526       /* see comment in hdefs.h re this insn */
   2527       if (0) vex_printf("EMIT FENCE\n");
   2528       if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
   2529                                   |VEX_HWCAPS_X86_SSE2)) {
   2530          /* mfence */
   2531          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
   2532          goto done;
   2533       }
   2534       if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) {
   2535          /* sfence */
   2536          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
   2537          /* lock addl $0,0(%esp) */
   2538          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
   2539          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
   2540          goto done;
   2541       }
   2542       if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
   2543          /* lock addl $0,0(%esp) */
   2544          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
   2545          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
   2546          goto done;
   2547       }
   2548       vpanic("emit_X86Instr:mfence:hwcaps");
   2549       /*NOTREACHED*/
   2550       break;
   2551 
   2552    case Xin_ACAS:
   2553       /* lock */
   2554       *p++ = 0xF0;
   2555       /* cmpxchg{b,w,l} %ebx,mem.  Expected-value in %eax, new value
   2556          in %ebx.  The new-value register is hardwired to be %ebx
   2557          since letting it be any integer register gives the problem
   2558          that %sil and %dil are unaddressible on x86 and hence we
   2559          would have to resort to the same kind of trickery as with
   2560          byte-sized Xin.Store, just below.  Given that this isn't
   2561          performance critical, it is simpler just to force the
   2562          register operand to %ebx (could equally be %ecx or %edx).
   2563          (Although %ebx is more consistent with cmpxchg8b.) */
   2564       if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
   2565       *p++ = 0x0F;
   2566       if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
   2567       p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
   2568       goto done;
   2569 
   2570    case Xin_DACAS:
   2571       /* lock */
   2572       *p++ = 0xF0;
   2573       /* cmpxchg8b m64.  Expected-value in %edx:%eax, new value
   2574          in %ecx:%ebx.  All 4 regs are hardwired in the ISA, so
   2575          aren't encoded in the insn. */
   2576       *p++ = 0x0F;
   2577       *p++ = 0xC7;
   2578       p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
   2579       goto done;
   2580 
   2581    case Xin_Store:
   2582       if (i->Xin.Store.sz == 2) {
   2583          /* This case, at least, is simple, given that we can
   2584             reference the low 16 bits of any integer register. */
   2585          *p++ = 0x66;
   2586          *p++ = 0x89;
   2587          p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
   2588          goto done;
   2589       }
   2590 
   2591       if (i->Xin.Store.sz == 1) {
   2592          /* We have to do complex dodging and weaving if src is not
   2593             the low 8 bits of %eax/%ebx/%ecx/%edx. */
   2594          if (iregNo(i->Xin.Store.src) < 4) {
   2595             /* we're OK, can do it directly */
   2596             *p++ = 0x88;
   2597             p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
   2598            goto done;
   2599          } else {
   2600             /* Bleh.  This means the source is %edi or %esi.  Since
   2601                the address mode can only mention three registers, at
   2602                least one of %eax/%ebx/%ecx/%edx must be available to
   2603                temporarily swap the source into, so the store can
   2604                happen.  So we have to look at the regs mentioned
   2605                in the amode. */
   2606             HReg swap = INVALID_HREG;
   2607             HReg  eax = hregX86_EAX(), ebx = hregX86_EBX(),
   2608                   ecx = hregX86_ECX(), edx = hregX86_EDX();
   2609             Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True;
   2610             HRegUsage u;
   2611             Int j;
   2612             initHRegUsage(&u);
   2613             addRegUsage_X86AMode(&u,  i->Xin.Store.dst);
   2614             for (j = 0; j < u.n_used; j++) {
   2615                HReg r = u.hreg[j];
   2616                if (r == eax) a_ok = False;
   2617                if (r == ebx) b_ok = False;
   2618                if (r == ecx) c_ok = False;
   2619                if (r == edx) d_ok = False;
   2620             }
   2621             if (a_ok) swap = eax;
   2622             if (b_ok) swap = ebx;
   2623             if (c_ok) swap = ecx;
   2624             if (d_ok) swap = edx;
   2625             vassert(swap != INVALID_HREG);
   2626             /* xchgl %source, %swap. Could do better if swap is %eax. */
   2627             *p++ = 0x87;
   2628             p = doAMode_R(p, i->Xin.Store.src, swap);
   2629             /* movb lo8{%swap}, (dst) */
   2630             *p++ = 0x88;
   2631             p = doAMode_M(p, swap, i->Xin.Store.dst);
   2632             /* xchgl %source, %swap. Could do better if swap is %eax. */
   2633             *p++ = 0x87;
   2634             p = doAMode_R(p, i->Xin.Store.src, swap);
   2635             goto done;
   2636          }
   2637       } /* if (i->Xin.Store.sz == 1) */
   2638       break;
   2639 
   2640    case Xin_FpUnary:
   2641       /* gop %src, %dst
   2642          --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
   2643       */
   2644       p = do_ffree_st7(p);
   2645       p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
   2646       p = do_fop1_st(p, i->Xin.FpUnary.op);
   2647       p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
   2648       goto done;
   2649 
   2650    case Xin_FpBinary:
   2651       if (i->Xin.FpBinary.op == Xfp_YL2X
   2652           || i->Xin.FpBinary.op == Xfp_YL2XP1) {
   2653          /* Have to do this specially. */
   2654          /* ffree %st7 ; fld %st(srcL) ;
   2655             ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
   2656          p = do_ffree_st7(p);
   2657          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2658          p = do_ffree_st7(p);
   2659          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
   2660          *p++ = 0xD9;
   2661          *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
   2662          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2663          goto done;
   2664       }
   2665       if (i->Xin.FpBinary.op == Xfp_ATAN) {
   2666          /* Have to do this specially. */
   2667          /* ffree %st7 ; fld %st(srcL) ;
   2668             ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
   2669          p = do_ffree_st7(p);
   2670          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2671          p = do_ffree_st7(p);
   2672          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
   2673          *p++ = 0xD9; *p++ = 0xF3;
   2674          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2675          goto done;
   2676       }
   2677       if (i->Xin.FpBinary.op == Xfp_PREM
   2678           || i->Xin.FpBinary.op == Xfp_PREM1
   2679           || i->Xin.FpBinary.op == Xfp_SCALE) {
   2680          /* Have to do this specially. */
   2681          /* ffree %st7 ; fld %st(srcR) ;
   2682             ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
   2683             fincstp ; ffree %st7 */
   2684          p = do_ffree_st7(p);
   2685          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
   2686          p = do_ffree_st7(p);
   2687          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
   2688          *p++ = 0xD9;
   2689          switch (i->Xin.FpBinary.op) {
   2690             case Xfp_PREM: *p++ = 0xF8; break;
   2691             case Xfp_PREM1: *p++ = 0xF5; break;
   2692             case Xfp_SCALE: *p++ =  0xFD; break;
   2693             default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
   2694          }
   2695          p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
   2696          *p++ = 0xD9; *p++ = 0xF7;
   2697          p = do_ffree_st7(p);
   2698          goto done;
   2699       }
   2700       /* General case */
   2701       /* gop %srcL, %srcR, %dst
   2702          --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
   2703       */
   2704       p = do_ffree_st7(p);
   2705       p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2706       p = do_fop2_st(p, i->Xin.FpBinary.op,
   2707                         1+hregNumber(i->Xin.FpBinary.srcR));
   2708       p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2709       goto done;
   2710 
   2711    case Xin_FpLdSt:
   2712       if (i->Xin.FpLdSt.isLoad) {
   2713          /* Load from memory into %fakeN.
   2714             --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
   2715          */
   2716          p = do_ffree_st7(p);
   2717          switch (i->Xin.FpLdSt.sz) {
   2718             case 4:
   2719                *p++ = 0xD9;
   2720                p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
   2721                break;
   2722             case 8:
   2723                *p++ = 0xDD;
   2724                p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
   2725                break;
   2726             case 10:
   2727                *p++ = 0xDB;
   2728                p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
   2729                break;
   2730             default:
   2731                vpanic("emitX86Instr(FpLdSt,load)");
   2732          }
   2733          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
   2734          goto done;
   2735       } else {
   2736          /* Store from %fakeN into memory.
   2737             --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
   2738 	 */
   2739          p = do_ffree_st7(p);
   2740          p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
   2741          switch (i->Xin.FpLdSt.sz) {
   2742             case 4:
   2743                *p++ = 0xD9;
   2744                p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
   2745                break;
   2746             case 8:
   2747                *p++ = 0xDD;
   2748                p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
   2749                break;
   2750             case 10:
   2751                *p++ = 0xDB;
   2752                p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
   2753                break;
   2754             default:
   2755                vpanic("emitX86Instr(FpLdSt,store)");
   2756          }
   2757          goto done;
   2758       }
   2759       break;
   2760 
   2761    case Xin_FpLdStI:
   2762       if (i->Xin.FpLdStI.isLoad) {
   2763          /* Load from memory into %fakeN, converting from an int.
   2764             --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
   2765          */
   2766          switch (i->Xin.FpLdStI.sz) {
   2767             case 8:  opc = 0xDF; subopc_imm = 5; break;
   2768             case 4:  opc = 0xDB; subopc_imm = 0; break;
   2769             case 2:  vassert(0); opc = 0xDF; subopc_imm = 0; break;
   2770             default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
   2771          }
   2772          p = do_ffree_st7(p);
   2773          *p++ = toUChar(opc);
   2774          p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
   2775          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
   2776          goto done;
   2777       } else {
   2778          /* Store from %fakeN into memory, converting to an int.
   2779             --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
   2780 	 */
   2781          switch (i->Xin.FpLdStI.sz) {
   2782             case 8:  opc = 0xDF; subopc_imm = 7; break;
   2783             case 4:  opc = 0xDB; subopc_imm = 3; break;
   2784             case 2:  opc = 0xDF; subopc_imm = 3; break;
   2785             default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
   2786          }
   2787          p = do_ffree_st7(p);
   2788          p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
   2789          *p++ = toUChar(opc);
   2790          p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
   2791          goto done;
   2792       }
   2793       break;
   2794 
   2795    case Xin_Fp64to32:
   2796       /* ffree %st7 ; fld %st(src) */
   2797       p = do_ffree_st7(p);
   2798       p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
   2799       /* subl $4, %esp */
   2800       *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
   2801       /* fstps (%esp) */
   2802       *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
   2803       /* flds (%esp) */
   2804       *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
   2805       /* addl $4, %esp */
   2806       *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
   2807       /* fstp %st(1+dst) */
   2808       p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
   2809       goto done;
   2810 
   2811    case Xin_FpCMov:
   2812       /* jmp fwds if !condition */
   2813       *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
   2814       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   2815       ptmp = p;
   2816 
   2817       /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
   2818       p = do_ffree_st7(p);
   2819       p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
   2820       p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
   2821 
   2822       /* Fill in the jump offset. */
   2823       *(ptmp-1) = toUChar(p - ptmp);
   2824       goto done;
   2825 
   2826    case Xin_FpLdCW:
   2827       *p++ = 0xD9;
   2828       p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr);
   2829       goto done;
   2830 
   2831    case Xin_FpStSW_AX:
   2832       /* note, this emits fnstsw %ax, not fstsw %ax */
   2833       *p++ = 0xDF;
   2834       *p++ = 0xE0;
   2835       goto done;
   2836 
   2837    case Xin_FpCmp:
   2838       /* gcmp %fL, %fR, %dst
   2839          -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
   2840             fnstsw %ax ; movl %eax, %dst
   2841       */
   2842       /* ffree %st7 */
   2843       p = do_ffree_st7(p);
   2844       /* fpush %fL */
   2845       p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
   2846       /* fucomp %(fR+1) */
   2847       *p++ = 0xDD;
   2848       *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))));
   2849       /* fnstsw %ax */
   2850       *p++ = 0xDF;
   2851       *p++ = 0xE0;
   2852       /*  movl %eax, %dst */
   2853       *p++ = 0x89;
   2854       p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
   2855       goto done;
   2856 
   2857    case Xin_SseConst: {
   2858       UShort con = i->Xin.SseConst.con;
   2859       p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
   2860       p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
   2861       p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
   2862       p = push_word_from_tags(p, toUShort(con & 0xF));
   2863       /* movl (%esp), %xmm-dst */
   2864       *p++ = 0x0F;
   2865       *p++ = 0x10;
   2866       *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)));
   2867       *p++ = 0x24;
   2868       /* addl $16, %esp */
   2869       *p++ = 0x83;
   2870       *p++ = 0xC4;
   2871       *p++ = 0x10;
   2872       goto done;
   2873    }
   2874 
   2875    case Xin_SseLdSt:
   2876       *p++ = 0x0F;
   2877       *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
   2878       p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr);
   2879       goto done;
   2880 
   2881    case Xin_SseLdzLO:
   2882       vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
   2883       /* movs[sd] amode, %xmm-dst */
   2884       *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
   2885       *p++ = 0x0F;
   2886       *p++ = 0x10;
   2887       p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)),
   2888                        i->Xin.SseLdzLO.addr);
   2889       goto done;
   2890 
   2891    case Xin_Sse32Fx4:
   2892       xtra = 0;
   2893       *p++ = 0x0F;
   2894       switch (i->Xin.Sse32Fx4.op) {
   2895          case Xsse_ADDF:   *p++ = 0x58; break;
   2896          case Xsse_DIVF:   *p++ = 0x5E; break;
   2897          case Xsse_MAXF:   *p++ = 0x5F; break;
   2898          case Xsse_MINF:   *p++ = 0x5D; break;
   2899          case Xsse_MULF:   *p++ = 0x59; break;
   2900          case Xsse_RCPF:   *p++ = 0x53; break;
   2901          case Xsse_RSQRTF: *p++ = 0x52; break;
   2902          case Xsse_SQRTF:  *p++ = 0x51; break;
   2903          case Xsse_SUBF:   *p++ = 0x5C; break;
   2904          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   2905          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   2906          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   2907          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   2908          default: goto bad;
   2909       }
   2910       p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)),
   2911                        fake(vregNo(i->Xin.Sse32Fx4.src)) );
   2912       if (xtra & 0x100)
   2913          *p++ = toUChar(xtra & 0xFF);
   2914       goto done;
   2915 
   2916    case Xin_Sse64Fx2:
   2917       xtra = 0;
   2918       *p++ = 0x66;
   2919       *p++ = 0x0F;
   2920       switch (i->Xin.Sse64Fx2.op) {
   2921          case Xsse_ADDF:   *p++ = 0x58; break;
   2922          case Xsse_DIVF:   *p++ = 0x5E; break;
   2923          case Xsse_MAXF:   *p++ = 0x5F; break;
   2924          case Xsse_MINF:   *p++ = 0x5D; break;
   2925          case Xsse_MULF:   *p++ = 0x59; break;
   2926          case Xsse_RCPF:   *p++ = 0x53; break;
   2927          case Xsse_RSQRTF: *p++ = 0x52; break;
   2928          case Xsse_SQRTF:  *p++ = 0x51; break;
   2929          case Xsse_SUBF:   *p++ = 0x5C; break;
   2930          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   2931          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   2932          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   2933          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   2934          default: goto bad;
   2935       }
   2936       p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)),
   2937                        fake(vregNo(i->Xin.Sse64Fx2.src)) );
   2938       if (xtra & 0x100)
   2939          *p++ = toUChar(xtra & 0xFF);
   2940       goto done;
   2941 
   2942    case Xin_Sse32FLo:
   2943       xtra = 0;
   2944       *p++ = 0xF3;
   2945       *p++ = 0x0F;
   2946       switch (i->Xin.Sse32FLo.op) {
   2947          case Xsse_ADDF:   *p++ = 0x58; break;
   2948          case Xsse_DIVF:   *p++ = 0x5E; break;
   2949          case Xsse_MAXF:   *p++ = 0x5F; break;
   2950          case Xsse_MINF:   *p++ = 0x5D; break;
   2951          case Xsse_MULF:   *p++ = 0x59; break;
   2952          case Xsse_RCPF:   *p++ = 0x53; break;
   2953          case Xsse_RSQRTF: *p++ = 0x52; break;
   2954          case Xsse_SQRTF:  *p++ = 0x51; break;
   2955          case Xsse_SUBF:   *p++ = 0x5C; break;
   2956          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   2957          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   2958          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   2959          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   2960          default: goto bad;
   2961       }
   2962       p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)),
   2963                        fake(vregNo(i->Xin.Sse32FLo.src)) );
   2964       if (xtra & 0x100)
   2965          *p++ = toUChar(xtra & 0xFF);
   2966       goto done;
   2967 
   2968    case Xin_Sse64FLo:
   2969       xtra = 0;
   2970       *p++ = 0xF2;
   2971       *p++ = 0x0F;
   2972       switch (i->Xin.Sse64FLo.op) {
   2973          case Xsse_ADDF:   *p++ = 0x58; break;
   2974          case Xsse_DIVF:   *p++ = 0x5E; break;
   2975          case Xsse_MAXF:   *p++ = 0x5F; break;
   2976          case Xsse_MINF:   *p++ = 0x5D; break;
   2977          case Xsse_MULF:   *p++ = 0x59; break;
   2978          case Xsse_RCPF:   *p++ = 0x53; break;
   2979          case Xsse_RSQRTF: *p++ = 0x52; break;
   2980          case Xsse_SQRTF:  *p++ = 0x51; break;
   2981          case Xsse_SUBF:   *p++ = 0x5C; break;
   2982          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   2983          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   2984          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   2985          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   2986          default: goto bad;
   2987       }
   2988       p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)),
   2989                        fake(vregNo(i->Xin.Sse64FLo.src)) );
   2990       if (xtra & 0x100)
   2991          *p++ = toUChar(xtra & 0xFF);
   2992       goto done;
   2993 
   2994    case Xin_SseReRg:
   2995 #     define XX(_n) *p++ = (_n)
   2996       switch (i->Xin.SseReRg.op) {
   2997          case Xsse_MOV:     /*movups*/ XX(0x0F); XX(0x10); break;
   2998          case Xsse_OR:                 XX(0x0F); XX(0x56); break;
   2999          case Xsse_XOR:                XX(0x0F); XX(0x57); break;
   3000          case Xsse_AND:                XX(0x0F); XX(0x54); break;
   3001          case Xsse_PACKSSD:  XX(0x66); XX(0x0F); XX(0x6B); break;
   3002          case Xsse_PACKSSW:  XX(0x66); XX(0x0F); XX(0x63); break;
   3003          case Xsse_PACKUSW:  XX(0x66); XX(0x0F); XX(0x67); break;
   3004          case Xsse_ADD8:     XX(0x66); XX(0x0F); XX(0xFC); break;
   3005          case Xsse_ADD16:    XX(0x66); XX(0x0F); XX(0xFD); break;
   3006          case Xsse_ADD32:    XX(0x66); XX(0x0F); XX(0xFE); break;
   3007          case Xsse_ADD64:    XX(0x66); XX(0x0F); XX(0xD4); break;
   3008          case Xsse_QADD8S:   XX(0x66); XX(0x0F); XX(0xEC); break;
   3009          case Xsse_QADD16S:  XX(0x66); XX(0x0F); XX(0xED); break;
   3010          case Xsse_QADD8U:   XX(0x66); XX(0x0F); XX(0xDC); break;
   3011          case Xsse_QADD16U:  XX(0x66); XX(0x0F); XX(0xDD); break;
   3012          case Xsse_AVG8U:    XX(0x66); XX(0x0F); XX(0xE0); break;
   3013          case Xsse_AVG16U:   XX(0x66); XX(0x0F); XX(0xE3); break;
   3014          case Xsse_CMPEQ8:   XX(0x66); XX(0x0F); XX(0x74); break;
   3015          case Xsse_CMPEQ16:  XX(0x66); XX(0x0F); XX(0x75); break;
   3016          case Xsse_CMPEQ32:  XX(0x66); XX(0x0F); XX(0x76); break;
   3017          case Xsse_CMPGT8S:  XX(0x66); XX(0x0F); XX(0x64); break;
   3018          case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
   3019          case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
   3020          case Xsse_MAX16S:   XX(0x66); XX(0x0F); XX(0xEE); break;
   3021          case Xsse_MAX8U:    XX(0x66); XX(0x0F); XX(0xDE); break;
   3022          case Xsse_MIN16S:   XX(0x66); XX(0x0F); XX(0xEA); break;
   3023          case Xsse_MIN8U:    XX(0x66); XX(0x0F); XX(0xDA); break;
   3024          case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
   3025          case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
   3026          case Xsse_MUL16:    XX(0x66); XX(0x0F); XX(0xD5); break;
   3027          case Xsse_SHL16:    XX(0x66); XX(0x0F); XX(0xF1); break;
   3028          case Xsse_SHL32:    XX(0x66); XX(0x0F); XX(0xF2); break;
   3029          case Xsse_SHL64:    XX(0x66); XX(0x0F); XX(0xF3); break;
   3030          case Xsse_SAR16:    XX(0x66); XX(0x0F); XX(0xE1); break;
   3031          case Xsse_SAR32:    XX(0x66); XX(0x0F); XX(0xE2); break;
   3032          case Xsse_SHR16:    XX(0x66); XX(0x0F); XX(0xD1); break;
   3033          case Xsse_SHR32:    XX(0x66); XX(0x0F); XX(0xD2); break;
   3034          case Xsse_SHR64:    XX(0x66); XX(0x0F); XX(0xD3); break;
   3035          case Xsse_SUB8:     XX(0x66); XX(0x0F); XX(0xF8); break;
   3036          case Xsse_SUB16:    XX(0x66); XX(0x0F); XX(0xF9); break;
   3037          case Xsse_SUB32:    XX(0x66); XX(0x0F); XX(0xFA); break;
   3038          case Xsse_SUB64:    XX(0x66); XX(0x0F); XX(0xFB); break;
   3039          case Xsse_QSUB8S:   XX(0x66); XX(0x0F); XX(0xE8); break;
   3040          case Xsse_QSUB16S:  XX(0x66); XX(0x0F); XX(0xE9); break;
   3041          case Xsse_QSUB8U:   XX(0x66); XX(0x0F); XX(0xD8); break;
   3042          case Xsse_QSUB16U:  XX(0x66); XX(0x0F); XX(0xD9); break;
   3043          case Xsse_UNPCKHB:  XX(0x66); XX(0x0F); XX(0x68); break;
   3044          case Xsse_UNPCKHW:  XX(0x66); XX(0x0F); XX(0x69); break;
   3045          case Xsse_UNPCKHD:  XX(0x66); XX(0x0F); XX(0x6A); break;
   3046          case Xsse_UNPCKHQ:  XX(0x66); XX(0x0F); XX(0x6D); break;
   3047          case Xsse_UNPCKLB:  XX(0x66); XX(0x0F); XX(0x60); break;
   3048          case Xsse_UNPCKLW:  XX(0x66); XX(0x0F); XX(0x61); break;
   3049          case Xsse_UNPCKLD:  XX(0x66); XX(0x0F); XX(0x62); break;
   3050          case Xsse_UNPCKLQ:  XX(0x66); XX(0x0F); XX(0x6C); break;
   3051          default: goto bad;
   3052       }
   3053       p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)),
   3054                        fake(vregNo(i->Xin.SseReRg.src)) );
   3055 #     undef XX
   3056       goto done;
   3057 
   3058    case Xin_SseCMov:
   3059       /* jmp fwds if !condition */
   3060       *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
   3061       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   3062       ptmp = p;
   3063 
   3064       /* movaps %src, %dst */
   3065       *p++ = 0x0F;
   3066       *p++ = 0x28;
   3067       p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)),
   3068                        fake(vregNo(i->Xin.SseCMov.src)) );
   3069 
   3070       /* Fill in the jump offset. */
   3071       *(ptmp-1) = toUChar(p - ptmp);
   3072       goto done;
   3073 
   3074    case Xin_SseShuf:
   3075       *p++ = 0x66;
   3076       *p++ = 0x0F;
   3077       *p++ = 0x70;
   3078       p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)),
   3079                        fake(vregNo(i->Xin.SseShuf.src)) );
   3080       *p++ = (UChar)(i->Xin.SseShuf.order);
   3081       goto done;
   3082 
   3083    default:
   3084       goto bad;
   3085    }
   3086 
   3087   bad:
   3088    ppX86Instr(i, mode64);
   3089    vpanic("emit_X86Instr");
   3090    /*NOTREACHED*/
   3091 
   3092   done:
   3093    vassert(p - &buf[0] <= 32);
   3094    return p - &buf[0];
   3095 
   3096 #  undef fake
   3097 }
   3098 
   3099 /*---------------------------------------------------------------*/
   3100 /*--- end                                     host_x86_defs.c ---*/
   3101 /*---------------------------------------------------------------*/
   3102