Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_x86_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_x86_defs.h"
     43 
     44 
     45 /* --------- Registers. --------- */
     46 
     47 void ppHRegX86 ( HReg reg )
     48 {
     49    Int r;
     50    static HChar* ireg32_names[8]
     51      = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
     52    /* Be generic for all virtual regs. */
     53    if (hregIsVirtual(reg)) {
     54       ppHReg(reg);
     55       return;
     56    }
     57    /* But specific for real regs. */
     58    switch (hregClass(reg)) {
     59       case HRcInt32:
     60          r = hregNumber(reg);
     61          vassert(r >= 0 && r < 8);
     62          vex_printf("%s", ireg32_names[r]);
     63          return;
     64       case HRcFlt64:
     65          r = hregNumber(reg);
     66          vassert(r >= 0 && r < 6);
     67          vex_printf("%%fake%d", r);
     68          return;
     69       case HRcVec128:
     70          r = hregNumber(reg);
     71          vassert(r >= 0 && r < 8);
     72          vex_printf("%%xmm%d", r);
     73          return;
     74       default:
     75          vpanic("ppHRegX86");
     76    }
     77 }
     78 
     79 HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); }
     80 HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); }
     81 HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); }
     82 HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); }
     83 HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); }
     84 HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); }
     85 HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); }
     86 HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); }
     87 
     88 HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
     89 HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
     90 HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
     91 HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
     92 HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
     93 HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
     94 
     95 HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); }
     96 HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); }
     97 HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); }
     98 HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); }
     99 HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); }
    100 HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); }
    101 HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); }
    102 HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); }
    103 
    104 
    105 void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
    106 {
    107    *nregs = 20;
    108    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    109    (*arr)[0] = hregX86_EAX();
    110    (*arr)[1] = hregX86_EBX();
    111    (*arr)[2] = hregX86_ECX();
    112    (*arr)[3] = hregX86_EDX();
    113    (*arr)[4] = hregX86_ESI();
    114    (*arr)[5] = hregX86_EDI();
    115    (*arr)[6] = hregX86_FAKE0();
    116    (*arr)[7] = hregX86_FAKE1();
    117    (*arr)[8] = hregX86_FAKE2();
    118    (*arr)[9] = hregX86_FAKE3();
    119    (*arr)[10] = hregX86_FAKE4();
    120    (*arr)[11] = hregX86_FAKE5();
    121    (*arr)[12] = hregX86_XMM0();
    122    (*arr)[13] = hregX86_XMM1();
    123    (*arr)[14] = hregX86_XMM2();
    124    (*arr)[15] = hregX86_XMM3();
    125    (*arr)[16] = hregX86_XMM4();
    126    (*arr)[17] = hregX86_XMM5();
    127    (*arr)[18] = hregX86_XMM6();
    128    (*arr)[19] = hregX86_XMM7();
    129 }
    130 
    131 
    132 /* --------- Condition codes, Intel encoding. --------- */
    133 
    134 HChar* showX86CondCode ( X86CondCode cond )
    135 {
    136    switch (cond) {
    137       case Xcc_O:      return "o";
    138       case Xcc_NO:     return "no";
    139       case Xcc_B:      return "b";
    140       case Xcc_NB:     return "nb";
    141       case Xcc_Z:      return "z";
    142       case Xcc_NZ:     return "nz";
    143       case Xcc_BE:     return "be";
    144       case Xcc_NBE:    return "nbe";
    145       case Xcc_S:      return "s";
    146       case Xcc_NS:     return "ns";
    147       case Xcc_P:      return "p";
    148       case Xcc_NP:     return "np";
    149       case Xcc_L:      return "l";
    150       case Xcc_NL:     return "nl";
    151       case Xcc_LE:     return "le";
    152       case Xcc_NLE:    return "nle";
    153       case Xcc_ALWAYS: return "ALWAYS";
    154       default: vpanic("ppX86CondCode");
    155    }
    156 }
    157 
    158 
    159 /* --------- X86AMode: memory address expressions. --------- */
    160 
    161 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
    162    X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
    163    am->tag = Xam_IR;
    164    am->Xam.IR.imm = imm32;
    165    am->Xam.IR.reg = reg;
    166    return am;
    167 }
    168 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
    169    X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
    170    am->tag = Xam_IRRS;
    171    am->Xam.IRRS.imm = imm32;
    172    am->Xam.IRRS.base = base;
    173    am->Xam.IRRS.index = indEx;
    174    am->Xam.IRRS.shift = shift;
    175    vassert(shift >= 0 && shift <= 3);
    176    return am;
    177 }
    178 
    179 X86AMode* dopyX86AMode ( X86AMode* am ) {
    180    switch (am->tag) {
    181       case Xam_IR:
    182          return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
    183       case Xam_IRRS:
    184          return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
    185                                am->Xam.IRRS.index, am->Xam.IRRS.shift );
    186       default:
    187          vpanic("dopyX86AMode");
    188    }
    189 }
    190 
    191 void ppX86AMode ( X86AMode* am ) {
    192    switch (am->tag) {
    193       case Xam_IR:
    194          if (am->Xam.IR.imm == 0)
    195             vex_printf("(");
    196          else
    197             vex_printf("0x%x(", am->Xam.IR.imm);
    198          ppHRegX86(am->Xam.IR.reg);
    199          vex_printf(")");
    200          return;
    201       case Xam_IRRS:
    202          vex_printf("0x%x(", am->Xam.IRRS.imm);
    203          ppHRegX86(am->Xam.IRRS.base);
    204          vex_printf(",");
    205          ppHRegX86(am->Xam.IRRS.index);
    206          vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
    207          return;
    208       default:
    209          vpanic("ppX86AMode");
    210    }
    211 }
    212 
    213 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
    214    switch (am->tag) {
    215       case Xam_IR:
    216          addHRegUse(u, HRmRead, am->Xam.IR.reg);
    217          return;
    218       case Xam_IRRS:
    219          addHRegUse(u, HRmRead, am->Xam.IRRS.base);
    220          addHRegUse(u, HRmRead, am->Xam.IRRS.index);
    221          return;
    222       default:
    223          vpanic("addRegUsage_X86AMode");
    224    }
    225 }
    226 
    227 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
    228    switch (am->tag) {
    229       case Xam_IR:
    230          am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
    231          return;
    232       case Xam_IRRS:
    233          am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
    234          am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
    235          return;
    236       default:
    237          vpanic("mapRegs_X86AMode");
    238    }
    239 }
    240 
    241 /* --------- Operand, which can be reg, immediate or memory. --------- */
    242 
    243 X86RMI* X86RMI_Imm ( UInt imm32 ) {
    244    X86RMI* op         = LibVEX_Alloc(sizeof(X86RMI));
    245    op->tag            = Xrmi_Imm;
    246    op->Xrmi.Imm.imm32 = imm32;
    247    return op;
    248 }
    249 X86RMI* X86RMI_Reg ( HReg reg ) {
    250    X86RMI* op       = LibVEX_Alloc(sizeof(X86RMI));
    251    op->tag          = Xrmi_Reg;
    252    op->Xrmi.Reg.reg = reg;
    253    return op;
    254 }
    255 X86RMI* X86RMI_Mem ( X86AMode* am ) {
    256    X86RMI* op      = LibVEX_Alloc(sizeof(X86RMI));
    257    op->tag         = Xrmi_Mem;
    258    op->Xrmi.Mem.am = am;
    259    return op;
    260 }
    261 
    262 void ppX86RMI ( X86RMI* op ) {
    263    switch (op->tag) {
    264       case Xrmi_Imm:
    265          vex_printf("$0x%x", op->Xrmi.Imm.imm32);
    266          return;
    267       case Xrmi_Reg:
    268          ppHRegX86(op->Xrmi.Reg.reg);
    269          return;
    270       case Xrmi_Mem:
    271          ppX86AMode(op->Xrmi.Mem.am);
    272          return;
    273      default:
    274          vpanic("ppX86RMI");
    275    }
    276 }
    277 
    278 /* An X86RMI can only be used in a "read" context (what would it mean
    279    to write or modify a literal?) and so we enumerate its registers
    280    accordingly. */
    281 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
    282    switch (op->tag) {
    283       case Xrmi_Imm:
    284          return;
    285       case Xrmi_Reg:
    286          addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
    287          return;
    288       case Xrmi_Mem:
    289          addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
    290          return;
    291       default:
    292          vpanic("addRegUsage_X86RMI");
    293    }
    294 }
    295 
    296 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
    297    switch (op->tag) {
    298       case Xrmi_Imm:
    299          return;
    300       case Xrmi_Reg:
    301          op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
    302          return;
    303       case Xrmi_Mem:
    304          mapRegs_X86AMode(m, op->Xrmi.Mem.am);
    305          return;
    306       default:
    307          vpanic("mapRegs_X86RMI");
    308    }
    309 }
    310 
    311 
    312 /* --------- Operand, which can be reg or immediate only. --------- */
    313 
    314 X86RI* X86RI_Imm ( UInt imm32 ) {
    315    X86RI* op         = LibVEX_Alloc(sizeof(X86RI));
    316    op->tag           = Xri_Imm;
    317    op->Xri.Imm.imm32 = imm32;
    318    return op;
    319 }
    320 X86RI* X86RI_Reg ( HReg reg ) {
    321    X86RI* op       = LibVEX_Alloc(sizeof(X86RI));
    322    op->tag         = Xri_Reg;
    323    op->Xri.Reg.reg = reg;
    324    return op;
    325 }
    326 
    327 void ppX86RI ( X86RI* op ) {
    328    switch (op->tag) {
    329       case Xri_Imm:
    330          vex_printf("$0x%x", op->Xri.Imm.imm32);
    331          return;
    332       case Xri_Reg:
    333          ppHRegX86(op->Xri.Reg.reg);
    334          return;
    335      default:
    336          vpanic("ppX86RI");
    337    }
    338 }
    339 
    340 /* An X86RI can only be used in a "read" context (what would it mean
    341    to write or modify a literal?) and so we enumerate its registers
    342    accordingly. */
    343 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
    344    switch (op->tag) {
    345       case Xri_Imm:
    346          return;
    347       case Xri_Reg:
    348          addHRegUse(u, HRmRead, op->Xri.Reg.reg);
    349          return;
    350       default:
    351          vpanic("addRegUsage_X86RI");
    352    }
    353 }
    354 
    355 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
    356    switch (op->tag) {
    357       case Xri_Imm:
    358          return;
    359       case Xri_Reg:
    360          op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
    361          return;
    362       default:
    363          vpanic("mapRegs_X86RI");
    364    }
    365 }
    366 
    367 
    368 /* --------- Operand, which can be reg or memory only. --------- */
    369 
    370 X86RM* X86RM_Reg ( HReg reg ) {
    371    X86RM* op       = LibVEX_Alloc(sizeof(X86RM));
    372    op->tag         = Xrm_Reg;
    373    op->Xrm.Reg.reg = reg;
    374    return op;
    375 }
    376 X86RM* X86RM_Mem ( X86AMode* am ) {
    377    X86RM* op      = LibVEX_Alloc(sizeof(X86RM));
    378    op->tag        = Xrm_Mem;
    379    op->Xrm.Mem.am = am;
    380    return op;
    381 }
    382 
    383 void ppX86RM ( X86RM* op ) {
    384    switch (op->tag) {
    385       case Xrm_Mem:
    386          ppX86AMode(op->Xrm.Mem.am);
    387          return;
    388       case Xrm_Reg:
    389          ppHRegX86(op->Xrm.Reg.reg);
    390          return;
    391      default:
    392          vpanic("ppX86RM");
    393    }
    394 }
    395 
    396 /* Because an X86RM can be both a source or destination operand, we
    397    have to supply a mode -- pertaining to the operand as a whole --
    398    indicating how it's being used. */
    399 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
    400    switch (op->tag) {
    401       case Xrm_Mem:
    402          /* Memory is read, written or modified.  So we just want to
    403             know the regs read by the amode. */
    404          addRegUsage_X86AMode(u, op->Xrm.Mem.am);
    405          return;
    406       case Xrm_Reg:
    407          /* reg is read, written or modified.  Add it in the
    408             appropriate way. */
    409          addHRegUse(u, mode, op->Xrm.Reg.reg);
    410          return;
    411      default:
    412          vpanic("addRegUsage_X86RM");
    413    }
    414 }
    415 
    416 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
    417 {
    418    switch (op->tag) {
    419       case Xrm_Mem:
    420          mapRegs_X86AMode(m, op->Xrm.Mem.am);
    421          return;
    422       case Xrm_Reg:
    423          op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
    424          return;
    425      default:
    426          vpanic("mapRegs_X86RM");
    427    }
    428 }
    429 
    430 
    431 /* --------- Instructions. --------- */
    432 
    433 HChar* showX86UnaryOp ( X86UnaryOp op ) {
    434    switch (op) {
    435       case Xun_NOT: return "not";
    436       case Xun_NEG: return "neg";
    437       default: vpanic("showX86UnaryOp");
    438    }
    439 }
    440 
    441 HChar* showX86AluOp ( X86AluOp op ) {
    442    switch (op) {
    443       case Xalu_MOV:  return "mov";
    444       case Xalu_CMP:  return "cmp";
    445       case Xalu_ADD:  return "add";
    446       case Xalu_SUB:  return "sub";
    447       case Xalu_ADC:  return "adc";
    448       case Xalu_SBB:  return "sbb";
    449       case Xalu_AND:  return "and";
    450       case Xalu_OR:   return "or";
    451       case Xalu_XOR:  return "xor";
    452       case Xalu_MUL:  return "mul";
    453       default: vpanic("showX86AluOp");
    454    }
    455 }
    456 
    457 HChar* showX86ShiftOp ( X86ShiftOp op ) {
    458    switch (op) {
    459       case Xsh_SHL: return "shl";
    460       case Xsh_SHR: return "shr";
    461       case Xsh_SAR: return "sar";
    462       default: vpanic("showX86ShiftOp");
    463    }
    464 }
    465 
    466 HChar* showX86FpOp ( X86FpOp op ) {
    467    switch (op) {
    468       case Xfp_ADD:    return "add";
    469       case Xfp_SUB:    return "sub";
    470       case Xfp_MUL:    return "mul";
    471       case Xfp_DIV:    return "div";
    472       case Xfp_SCALE:  return "scale";
    473       case Xfp_ATAN:   return "atan";
    474       case Xfp_YL2X:   return "yl2x";
    475       case Xfp_YL2XP1: return "yl2xp1";
    476       case Xfp_PREM:   return "prem";
    477       case Xfp_PREM1:  return "prem1";
    478       case Xfp_SQRT:   return "sqrt";
    479       case Xfp_ABS:    return "abs";
    480       case Xfp_NEG:    return "chs";
    481       case Xfp_MOV:    return "mov";
    482       case Xfp_SIN:    return "sin";
    483       case Xfp_COS:    return "cos";
    484       case Xfp_TAN:    return "tan";
    485       case Xfp_ROUND:  return "round";
    486       case Xfp_2XM1:   return "2xm1";
    487       default: vpanic("showX86FpOp");
    488    }
    489 }
    490 
    491 HChar* showX86SseOp ( X86SseOp op ) {
    492    switch (op) {
    493       case Xsse_MOV:      return "mov(?!)";
    494       case Xsse_ADDF:     return "add";
    495       case Xsse_SUBF:     return "sub";
    496       case Xsse_MULF:     return "mul";
    497       case Xsse_DIVF:     return "div";
    498       case Xsse_MAXF:     return "max";
    499       case Xsse_MINF:     return "min";
    500       case Xsse_CMPEQF:   return "cmpFeq";
    501       case Xsse_CMPLTF:   return "cmpFlt";
    502       case Xsse_CMPLEF:   return "cmpFle";
    503       case Xsse_CMPUNF:   return "cmpFun";
    504       case Xsse_RCPF:     return "rcp";
    505       case Xsse_RSQRTF:   return "rsqrt";
    506       case Xsse_SQRTF:    return "sqrt";
    507       case Xsse_AND:      return "and";
    508       case Xsse_OR:       return "or";
    509       case Xsse_XOR:      return "xor";
    510       case Xsse_ANDN:     return "andn";
    511       case Xsse_ADD8:     return "paddb";
    512       case Xsse_ADD16:    return "paddw";
    513       case Xsse_ADD32:    return "paddd";
    514       case Xsse_ADD64:    return "paddq";
    515       case Xsse_QADD8U:   return "paddusb";
    516       case Xsse_QADD16U:  return "paddusw";
    517       case Xsse_QADD8S:   return "paddsb";
    518       case Xsse_QADD16S:  return "paddsw";
    519       case Xsse_SUB8:     return "psubb";
    520       case Xsse_SUB16:    return "psubw";
    521       case Xsse_SUB32:    return "psubd";
    522       case Xsse_SUB64:    return "psubq";
    523       case Xsse_QSUB8U:   return "psubusb";
    524       case Xsse_QSUB16U:  return "psubusw";
    525       case Xsse_QSUB8S:   return "psubsb";
    526       case Xsse_QSUB16S:  return "psubsw";
    527       case Xsse_MUL16:    return "pmullw";
    528       case Xsse_MULHI16U: return "pmulhuw";
    529       case Xsse_MULHI16S: return "pmulhw";
    530       case Xsse_AVG8U:    return "pavgb";
    531       case Xsse_AVG16U:   return "pavgw";
    532       case Xsse_MAX16S:   return "pmaxw";
    533       case Xsse_MAX8U:    return "pmaxub";
    534       case Xsse_MIN16S:   return "pminw";
    535       case Xsse_MIN8U:    return "pminub";
    536       case Xsse_CMPEQ8:   return "pcmpeqb";
    537       case Xsse_CMPEQ16:  return "pcmpeqw";
    538       case Xsse_CMPEQ32:  return "pcmpeqd";
    539       case Xsse_CMPGT8S:  return "pcmpgtb";
    540       case Xsse_CMPGT16S: return "pcmpgtw";
    541       case Xsse_CMPGT32S: return "pcmpgtd";
    542       case Xsse_SHL16:    return "psllw";
    543       case Xsse_SHL32:    return "pslld";
    544       case Xsse_SHL64:    return "psllq";
    545       case Xsse_SHR16:    return "psrlw";
    546       case Xsse_SHR32:    return "psrld";
    547       case Xsse_SHR64:    return "psrlq";
    548       case Xsse_SAR16:    return "psraw";
    549       case Xsse_SAR32:    return "psrad";
    550       case Xsse_PACKSSD:  return "packssdw";
    551       case Xsse_PACKSSW:  return "packsswb";
    552       case Xsse_PACKUSW:  return "packuswb";
    553       case Xsse_UNPCKHB:  return "punpckhb";
    554       case Xsse_UNPCKHW:  return "punpckhw";
    555       case Xsse_UNPCKHD:  return "punpckhd";
    556       case Xsse_UNPCKHQ:  return "punpckhq";
    557       case Xsse_UNPCKLB:  return "punpcklb";
    558       case Xsse_UNPCKLW:  return "punpcklw";
    559       case Xsse_UNPCKLD:  return "punpckld";
    560       case Xsse_UNPCKLQ:  return "punpcklq";
    561       default: vpanic("showX86SseOp");
    562    }
    563 }
    564 
    565 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
    566    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    567    i->tag            = Xin_Alu32R;
    568    i->Xin.Alu32R.op  = op;
    569    i->Xin.Alu32R.src = src;
    570    i->Xin.Alu32R.dst = dst;
    571    return i;
    572 }
    573 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
    574    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    575    i->tag            = Xin_Alu32M;
    576    i->Xin.Alu32M.op  = op;
    577    i->Xin.Alu32M.src = src;
    578    i->Xin.Alu32M.dst = dst;
    579    vassert(op != Xalu_MUL);
    580    return i;
    581 }
    582 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
    583    X86Instr* i     = LibVEX_Alloc(sizeof(X86Instr));
    584    i->tag          = Xin_Sh32;
    585    i->Xin.Sh32.op  = op;
    586    i->Xin.Sh32.src = src;
    587    i->Xin.Sh32.dst = dst;
    588    return i;
    589 }
    590 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
    591    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    592    i->tag              = Xin_Test32;
    593    i->Xin.Test32.imm32 = imm32;
    594    i->Xin.Test32.dst   = dst;
    595    return i;
    596 }
    597 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
    598    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    599    i->tag             = Xin_Unary32;
    600    i->Xin.Unary32.op  = op;
    601    i->Xin.Unary32.dst = dst;
    602    return i;
    603 }
    604 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
    605    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    606    i->tag             = Xin_Lea32;
    607    i->Xin.Lea32.am    = am;
    608    i->Xin.Lea32.dst   = dst;
    609    return i;
    610 }
    611 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
    612    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    613    i->tag             = Xin_MulL;
    614    i->Xin.MulL.syned  = syned;
    615    i->Xin.MulL.src    = src;
    616    return i;
    617 }
    618 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
    619    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    620    i->tag           = Xin_Div;
    621    i->Xin.Div.syned = syned;
    622    i->Xin.Div.src   = src;
    623    return i;
    624 }
    625 X86Instr* X86Instr_Sh3232  ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
    626    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    627    i->tag            = Xin_Sh3232;
    628    i->Xin.Sh3232.op  = op;
    629    i->Xin.Sh3232.amt = amt;
    630    i->Xin.Sh3232.src = src;
    631    i->Xin.Sh3232.dst = dst;
    632    vassert(op == Xsh_SHL || op == Xsh_SHR);
    633    return i;
    634 }
    635 X86Instr* X86Instr_Push( X86RMI* src ) {
    636    X86Instr* i     = LibVEX_Alloc(sizeof(X86Instr));
    637    i->tag          = Xin_Push;
    638    i->Xin.Push.src = src;
    639    return i;
    640 }
    641 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms ) {
    642    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    643    i->tag               = Xin_Call;
    644    i->Xin.Call.cond     = cond;
    645    i->Xin.Call.target   = target;
    646    i->Xin.Call.regparms = regparms;
    647    vassert(regparms >= 0 && regparms <= 3);
    648    return i;
    649 }
    650 X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) {
    651    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    652    i->tag           = Xin_Goto;
    653    i->Xin.Goto.cond = cond;
    654    i->Xin.Goto.dst  = dst;
    655    i->Xin.Goto.jk   = jk;
    656    return i;
    657 }
    658 X86Instr* X86Instr_CMov32  ( X86CondCode cond, X86RM* src, HReg dst ) {
    659    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    660    i->tag             = Xin_CMov32;
    661    i->Xin.CMov32.cond = cond;
    662    i->Xin.CMov32.src  = src;
    663    i->Xin.CMov32.dst  = dst;
    664    vassert(cond != Xcc_ALWAYS);
    665    return i;
    666 }
    667 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
    668                             X86AMode* src, HReg dst ) {
    669    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    670    i->tag                = Xin_LoadEX;
    671    i->Xin.LoadEX.szSmall = szSmall;
    672    i->Xin.LoadEX.syned   = syned;
    673    i->Xin.LoadEX.src     = src;
    674    i->Xin.LoadEX.dst     = dst;
    675    vassert(szSmall == 1 || szSmall == 2);
    676    return i;
    677 }
    678 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
    679    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    680    i->tag           = Xin_Store;
    681    i->Xin.Store.sz  = sz;
    682    i->Xin.Store.src = src;
    683    i->Xin.Store.dst = dst;
    684    vassert(sz == 1 || sz == 2);
    685    return i;
    686 }
    687 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
    688    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    689    i->tag            = Xin_Set32;
    690    i->Xin.Set32.cond = cond;
    691    i->Xin.Set32.dst  = dst;
    692    return i;
    693 }
    694 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
    695    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    696    i->tag               = Xin_Bsfr32;
    697    i->Xin.Bsfr32.isFwds = isFwds;
    698    i->Xin.Bsfr32.src    = src;
    699    i->Xin.Bsfr32.dst    = dst;
    700    return i;
    701 }
    702 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
    703    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    704    i->tag               = Xin_MFence;
    705    i->Xin.MFence.hwcaps = hwcaps;
    706    vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1
    707                             |VEX_HWCAPS_X86_SSE2
    708                             |VEX_HWCAPS_X86_SSE3
    709                             |VEX_HWCAPS_X86_LZCNT)));
    710    return i;
    711 }
    712 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
    713    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    714    i->tag           = Xin_ACAS;
    715    i->Xin.ACAS.addr = addr;
    716    i->Xin.ACAS.sz   = sz;
    717    vassert(sz == 4 || sz == 2 || sz == 1);
    718    return i;
    719 }
    720 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
    721    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    722    i->tag            = Xin_DACAS;
    723    i->Xin.DACAS.addr = addr;
    724    return i;
    725 }
    726 
    727 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
    728    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    729    i->tag             = Xin_FpUnary;
    730    i->Xin.FpUnary.op  = op;
    731    i->Xin.FpUnary.src = src;
    732    i->Xin.FpUnary.dst = dst;
    733    return i;
    734 }
    735 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
    736    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    737    i->tag               = Xin_FpBinary;
    738    i->Xin.FpBinary.op   = op;
    739    i->Xin.FpBinary.srcL = srcL;
    740    i->Xin.FpBinary.srcR = srcR;
    741    i->Xin.FpBinary.dst  = dst;
    742    return i;
    743 }
    744 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
    745    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    746    i->tag               = Xin_FpLdSt;
    747    i->Xin.FpLdSt.isLoad = isLoad;
    748    i->Xin.FpLdSt.sz     = sz;
    749    i->Xin.FpLdSt.reg    = reg;
    750    i->Xin.FpLdSt.addr   = addr;
    751    vassert(sz == 4 || sz == 8 || sz == 10);
    752    return i;
    753 }
    754 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
    755                              HReg reg, X86AMode* addr ) {
    756    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    757    i->tag                = Xin_FpLdStI;
    758    i->Xin.FpLdStI.isLoad = isLoad;
    759    i->Xin.FpLdStI.sz     = sz;
    760    i->Xin.FpLdStI.reg    = reg;
    761    i->Xin.FpLdStI.addr   = addr;
    762    vassert(sz == 2 || sz == 4 || sz == 8);
    763    return i;
    764 }
    765 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
    766    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    767    i->tag              = Xin_Fp64to32;
    768    i->Xin.Fp64to32.src = src;
    769    i->Xin.Fp64to32.dst = dst;
    770    return i;
    771 }
    772 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
    773    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    774    i->tag             = Xin_FpCMov;
    775    i->Xin.FpCMov.cond = cond;
    776    i->Xin.FpCMov.src  = src;
    777    i->Xin.FpCMov.dst  = dst;
    778    vassert(cond != Xcc_ALWAYS);
    779    return i;
    780 }
    781 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
    782    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    783    i->tag               = Xin_FpLdCW;
    784    i->Xin.FpLdCW.addr   = addr;
    785    return i;
    786 }
    787 X86Instr* X86Instr_FpStSW_AX ( void ) {
    788    X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
    789    i->tag      = Xin_FpStSW_AX;
    790    return i;
    791 }
    792 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
    793    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    794    i->tag            = Xin_FpCmp;
    795    i->Xin.FpCmp.srcL = srcL;
    796    i->Xin.FpCmp.srcR = srcR;
    797    i->Xin.FpCmp.dst  = dst;
    798    return i;
    799 }
    800 
    801 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
    802    X86Instr* i            = LibVEX_Alloc(sizeof(X86Instr));
    803    i->tag                 = Xin_SseConst;
    804    i->Xin.SseConst.con    = con;
    805    i->Xin.SseConst.dst    = dst;
    806    vassert(hregClass(dst) == HRcVec128);
    807    return i;
    808 }
    809 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
    810    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    811    i->tag                = Xin_SseLdSt;
    812    i->Xin.SseLdSt.isLoad = isLoad;
    813    i->Xin.SseLdSt.reg    = reg;
    814    i->Xin.SseLdSt.addr   = addr;
    815    return i;
    816 }
    817 X86Instr* X86Instr_SseLdzLO  ( Int sz, HReg reg, X86AMode* addr )
    818 {
    819    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    820    i->tag                = Xin_SseLdzLO;
    821    i->Xin.SseLdzLO.sz    = toUChar(sz);
    822    i->Xin.SseLdzLO.reg   = reg;
    823    i->Xin.SseLdzLO.addr  = addr;
    824    vassert(sz == 4 || sz == 8);
    825    return i;
    826 }
    827 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
    828    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    829    i->tag              = Xin_Sse32Fx4;
    830    i->Xin.Sse32Fx4.op  = op;
    831    i->Xin.Sse32Fx4.src = src;
    832    i->Xin.Sse32Fx4.dst = dst;
    833    vassert(op != Xsse_MOV);
    834    return i;
    835 }
    836 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
    837    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    838    i->tag              = Xin_Sse32FLo;
    839    i->Xin.Sse32FLo.op  = op;
    840    i->Xin.Sse32FLo.src = src;
    841    i->Xin.Sse32FLo.dst = dst;
    842    vassert(op != Xsse_MOV);
    843    return i;
    844 }
    845 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
    846    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    847    i->tag              = Xin_Sse64Fx2;
    848    i->Xin.Sse64Fx2.op  = op;
    849    i->Xin.Sse64Fx2.src = src;
    850    i->Xin.Sse64Fx2.dst = dst;
    851    vassert(op != Xsse_MOV);
    852    return i;
    853 }
    854 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
    855    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    856    i->tag              = Xin_Sse64FLo;
    857    i->Xin.Sse64FLo.op  = op;
    858    i->Xin.Sse64FLo.src = src;
    859    i->Xin.Sse64FLo.dst = dst;
    860    vassert(op != Xsse_MOV);
    861    return i;
    862 }
    863 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
    864    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    865    i->tag             = Xin_SseReRg;
    866    i->Xin.SseReRg.op  = op;
    867    i->Xin.SseReRg.src = re;
    868    i->Xin.SseReRg.dst = rg;
    869    return i;
    870 }
    871 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
    872    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    873    i->tag              = Xin_SseCMov;
    874    i->Xin.SseCMov.cond = cond;
    875    i->Xin.SseCMov.src  = src;
    876    i->Xin.SseCMov.dst  = dst;
    877    vassert(cond != Xcc_ALWAYS);
    878    return i;
    879 }
    880 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
    881    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    882    i->tag               = Xin_SseShuf;
    883    i->Xin.SseShuf.order = order;
    884    i->Xin.SseShuf.src   = src;
    885    i->Xin.SseShuf.dst   = dst;
    886    vassert(order >= 0 && order <= 0xFF);
    887    return i;
    888 }
    889 
    890 void ppX86Instr ( X86Instr* i, Bool mode64 ) {
    891    vassert(mode64 == False);
    892    switch (i->tag) {
    893       case Xin_Alu32R:
    894          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
    895          ppX86RMI(i->Xin.Alu32R.src);
    896          vex_printf(",");
    897          ppHRegX86(i->Xin.Alu32R.dst);
    898          return;
    899       case Xin_Alu32M:
    900          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
    901          ppX86RI(i->Xin.Alu32M.src);
    902          vex_printf(",");
    903          ppX86AMode(i->Xin.Alu32M.dst);
    904          return;
    905       case Xin_Sh32:
    906          vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
    907          if (i->Xin.Sh32.src == 0)
    908            vex_printf("%%cl,");
    909          else
    910             vex_printf("$%d,", (Int)i->Xin.Sh32.src);
    911          ppHRegX86(i->Xin.Sh32.dst);
    912          return;
    913       case Xin_Test32:
    914          vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
    915          ppX86RM(i->Xin.Test32.dst);
    916          return;
    917       case Xin_Unary32:
    918          vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
    919          ppHRegX86(i->Xin.Unary32.dst);
    920          return;
    921       case Xin_Lea32:
    922          vex_printf("leal ");
    923          ppX86AMode(i->Xin.Lea32.am);
    924          vex_printf(",");
    925          ppHRegX86(i->Xin.Lea32.dst);
    926          return;
    927       case Xin_MulL:
    928          vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
    929          ppX86RM(i->Xin.MulL.src);
    930          return;
    931       case Xin_Div:
    932          vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
    933          ppX86RM(i->Xin.Div.src);
    934          return;
    935       case Xin_Sh3232:
    936          vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
    937          if (i->Xin.Sh3232.amt == 0)
    938            vex_printf(" %%cl,");
    939          else
    940             vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
    941          ppHRegX86(i->Xin.Sh3232.src);
    942          vex_printf(",");
    943          ppHRegX86(i->Xin.Sh3232.dst);
    944          return;
    945       case Xin_Push:
    946          vex_printf("pushl ");
    947          ppX86RMI(i->Xin.Push.src);
    948          return;
    949       case Xin_Call:
    950          vex_printf("call%s[%d] ",
    951                     i->Xin.Call.cond==Xcc_ALWAYS
    952                        ? "" : showX86CondCode(i->Xin.Call.cond),
    953                     i->Xin.Call.regparms);
    954          vex_printf("0x%x", i->Xin.Call.target);
    955          break;
    956       case Xin_Goto:
    957          if (i->Xin.Goto.cond != Xcc_ALWAYS) {
    958             vex_printf("if (%%eflags.%s) { ",
    959                        showX86CondCode(i->Xin.Goto.cond));
    960 	 }
    961          if (i->Xin.Goto.jk != Ijk_Boring
    962              && i->Xin.Goto.jk != Ijk_Call
    963              && i->Xin.Goto.jk != Ijk_Ret) {
    964             vex_printf("movl $");
    965             ppIRJumpKind(i->Xin.Goto.jk);
    966             vex_printf(",%%ebp ; ");
    967          }
    968          vex_printf("movl ");
    969          ppX86RI(i->Xin.Goto.dst);
    970          vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx");
    971          if (i->Xin.Goto.cond != Xcc_ALWAYS) {
    972             vex_printf(" }");
    973 	 }
    974          return;
    975       case Xin_CMov32:
    976          vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
    977          ppX86RM(i->Xin.CMov32.src);
    978          vex_printf(",");
    979          ppHRegX86(i->Xin.CMov32.dst);
    980          return;
    981       case Xin_LoadEX:
    982          vex_printf("mov%c%cl ",
    983                     i->Xin.LoadEX.syned ? 's' : 'z',
    984                     i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
    985          ppX86AMode(i->Xin.LoadEX.src);
    986          vex_printf(",");
    987          ppHRegX86(i->Xin.LoadEX.dst);
    988          return;
    989       case Xin_Store:
    990          vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
    991          ppHRegX86(i->Xin.Store.src);
    992          vex_printf(",");
    993          ppX86AMode(i->Xin.Store.dst);
    994          return;
    995       case Xin_Set32:
    996          vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
    997          ppHRegX86(i->Xin.Set32.dst);
    998          return;
    999       case Xin_Bsfr32:
   1000          vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
   1001          ppHRegX86(i->Xin.Bsfr32.src);
   1002          vex_printf(",");
   1003          ppHRegX86(i->Xin.Bsfr32.dst);
   1004          return;
   1005       case Xin_MFence:
   1006          vex_printf("mfence(%s)",
   1007                     LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
   1008          return;
   1009       case Xin_ACAS:
   1010          vex_printf("lock cmpxchg%c ",
   1011                      i->Xin.ACAS.sz==1 ? 'b'
   1012                                        : i->Xin.ACAS.sz==2 ? 'w' : 'l');
   1013          vex_printf("{%%eax->%%ebx},");
   1014          ppX86AMode(i->Xin.ACAS.addr);
   1015          return;
   1016       case Xin_DACAS:
   1017          vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
   1018          ppX86AMode(i->Xin.DACAS.addr);
   1019          return;
   1020       case Xin_FpUnary:
   1021          vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
   1022          ppHRegX86(i->Xin.FpUnary.src);
   1023          vex_printf(",");
   1024          ppHRegX86(i->Xin.FpUnary.dst);
   1025          break;
   1026       case Xin_FpBinary:
   1027          vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
   1028          ppHRegX86(i->Xin.FpBinary.srcL);
   1029          vex_printf(",");
   1030          ppHRegX86(i->Xin.FpBinary.srcR);
   1031          vex_printf(",");
   1032          ppHRegX86(i->Xin.FpBinary.dst);
   1033          break;
   1034       case Xin_FpLdSt:
   1035          if (i->Xin.FpLdSt.isLoad) {
   1036             vex_printf("gld%c " ,  i->Xin.FpLdSt.sz==10 ? 'T'
   1037                                    : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
   1038             ppX86AMode(i->Xin.FpLdSt.addr);
   1039             vex_printf(", ");
   1040             ppHRegX86(i->Xin.FpLdSt.reg);
   1041          } else {
   1042             vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
   1043                                   : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
   1044             ppHRegX86(i->Xin.FpLdSt.reg);
   1045             vex_printf(", ");
   1046             ppX86AMode(i->Xin.FpLdSt.addr);
   1047          }
   1048          return;
   1049       case Xin_FpLdStI:
   1050          if (i->Xin.FpLdStI.isLoad) {
   1051             vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1052                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1053             ppX86AMode(i->Xin.FpLdStI.addr);
   1054             vex_printf(", ");
   1055             ppHRegX86(i->Xin.FpLdStI.reg);
   1056          } else {
   1057             vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1058                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1059             ppHRegX86(i->Xin.FpLdStI.reg);
   1060             vex_printf(", ");
   1061             ppX86AMode(i->Xin.FpLdStI.addr);
   1062          }
   1063          return;
   1064       case Xin_Fp64to32:
   1065          vex_printf("gdtof ");
   1066          ppHRegX86(i->Xin.Fp64to32.src);
   1067          vex_printf(",");
   1068          ppHRegX86(i->Xin.Fp64to32.dst);
   1069          return;
   1070       case Xin_FpCMov:
   1071          vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
   1072          ppHRegX86(i->Xin.FpCMov.src);
   1073          vex_printf(",");
   1074          ppHRegX86(i->Xin.FpCMov.dst);
   1075          return;
   1076       case Xin_FpLdCW:
   1077          vex_printf("fldcw ");
   1078          ppX86AMode(i->Xin.FpLdCW.addr);
   1079          return;
   1080       case Xin_FpStSW_AX:
   1081          vex_printf("fstsw %%ax");
   1082          return;
   1083       case Xin_FpCmp:
   1084          vex_printf("gcmp ");
   1085          ppHRegX86(i->Xin.FpCmp.srcL);
   1086          vex_printf(",");
   1087          ppHRegX86(i->Xin.FpCmp.srcR);
   1088          vex_printf(",");
   1089          ppHRegX86(i->Xin.FpCmp.dst);
   1090          break;
   1091       case Xin_SseConst:
   1092          vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
   1093          ppHRegX86(i->Xin.SseConst.dst);
   1094          break;
   1095       case Xin_SseLdSt:
   1096          vex_printf("movups ");
   1097          if (i->Xin.SseLdSt.isLoad) {
   1098             ppX86AMode(i->Xin.SseLdSt.addr);
   1099             vex_printf(",");
   1100             ppHRegX86(i->Xin.SseLdSt.reg);
   1101          } else {
   1102             ppHRegX86(i->Xin.SseLdSt.reg);
   1103             vex_printf(",");
   1104             ppX86AMode(i->Xin.SseLdSt.addr);
   1105          }
   1106          return;
   1107       case Xin_SseLdzLO:
   1108          vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
   1109          ppX86AMode(i->Xin.SseLdzLO.addr);
   1110          vex_printf(",");
   1111          ppHRegX86(i->Xin.SseLdzLO.reg);
   1112          return;
   1113       case Xin_Sse32Fx4:
   1114          vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
   1115          ppHRegX86(i->Xin.Sse32Fx4.src);
   1116          vex_printf(",");
   1117          ppHRegX86(i->Xin.Sse32Fx4.dst);
   1118          return;
   1119       case Xin_Sse32FLo:
   1120          vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
   1121          ppHRegX86(i->Xin.Sse32FLo.src);
   1122          vex_printf(",");
   1123          ppHRegX86(i->Xin.Sse32FLo.dst);
   1124          return;
   1125       case Xin_Sse64Fx2:
   1126          vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
   1127          ppHRegX86(i->Xin.Sse64Fx2.src);
   1128          vex_printf(",");
   1129          ppHRegX86(i->Xin.Sse64Fx2.dst);
   1130          return;
   1131       case Xin_Sse64FLo:
   1132          vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
   1133          ppHRegX86(i->Xin.Sse64FLo.src);
   1134          vex_printf(",");
   1135          ppHRegX86(i->Xin.Sse64FLo.dst);
   1136          return;
   1137       case Xin_SseReRg:
   1138          vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
   1139          ppHRegX86(i->Xin.SseReRg.src);
   1140          vex_printf(",");
   1141          ppHRegX86(i->Xin.SseReRg.dst);
   1142          return;
   1143       case Xin_SseCMov:
   1144          vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
   1145          ppHRegX86(i->Xin.SseCMov.src);
   1146          vex_printf(",");
   1147          ppHRegX86(i->Xin.SseCMov.dst);
   1148          return;
   1149       case Xin_SseShuf:
   1150          vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order);
   1151          ppHRegX86(i->Xin.SseShuf.src);
   1152          vex_printf(",");
   1153          ppHRegX86(i->Xin.SseShuf.dst);
   1154          return;
   1155 
   1156       default:
   1157          vpanic("ppX86Instr");
   1158    }
   1159 }
   1160 
   1161 /* --------- Helpers for register allocation. --------- */
   1162 
   1163 void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64)
   1164 {
   1165    Bool unary;
   1166    vassert(mode64 == False);
   1167    initHRegUsage(u);
   1168    switch (i->tag) {
   1169       case Xin_Alu32R:
   1170          addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
   1171          if (i->Xin.Alu32R.op == Xalu_MOV) {
   1172             addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
   1173             return;
   1174          }
   1175          if (i->Xin.Alu32R.op == Xalu_CMP) {
   1176             addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
   1177             return;
   1178          }
   1179          addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
   1180          return;
   1181       case Xin_Alu32M:
   1182          addRegUsage_X86RI(u, i->Xin.Alu32M.src);
   1183          addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
   1184          return;
   1185       case Xin_Sh32:
   1186          addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
   1187          if (i->Xin.Sh32.src == 0)
   1188             addHRegUse(u, HRmRead, hregX86_ECX());
   1189          return;
   1190       case Xin_Test32:
   1191          addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
   1192          return;
   1193       case Xin_Unary32:
   1194          addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
   1195          return;
   1196       case Xin_Lea32:
   1197          addRegUsage_X86AMode(u, i->Xin.Lea32.am);
   1198          addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
   1199          return;
   1200       case Xin_MulL:
   1201          addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
   1202          addHRegUse(u, HRmModify, hregX86_EAX());
   1203          addHRegUse(u, HRmWrite, hregX86_EDX());
   1204          return;
   1205       case Xin_Div:
   1206          addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
   1207          addHRegUse(u, HRmModify, hregX86_EAX());
   1208          addHRegUse(u, HRmModify, hregX86_EDX());
   1209          return;
   1210       case Xin_Sh3232:
   1211          addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
   1212          addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
   1213          if (i->Xin.Sh3232.amt == 0)
   1214             addHRegUse(u, HRmRead, hregX86_ECX());
   1215          return;
   1216       case Xin_Push:
   1217          addRegUsage_X86RMI(u, i->Xin.Push.src);
   1218          addHRegUse(u, HRmModify, hregX86_ESP());
   1219          return;
   1220       case Xin_Call:
   1221          /* This is a bit subtle. */
   1222          /* First off, claim it trashes all the caller-saved regs
   1223             which fall within the register allocator's jurisdiction.
   1224             These I believe to be %eax %ecx %edx and all the xmm
   1225             registers. */
   1226          addHRegUse(u, HRmWrite, hregX86_EAX());
   1227          addHRegUse(u, HRmWrite, hregX86_ECX());
   1228          addHRegUse(u, HRmWrite, hregX86_EDX());
   1229          addHRegUse(u, HRmWrite, hregX86_XMM0());
   1230          addHRegUse(u, HRmWrite, hregX86_XMM1());
   1231          addHRegUse(u, HRmWrite, hregX86_XMM2());
   1232          addHRegUse(u, HRmWrite, hregX86_XMM3());
   1233          addHRegUse(u, HRmWrite, hregX86_XMM4());
   1234          addHRegUse(u, HRmWrite, hregX86_XMM5());
   1235          addHRegUse(u, HRmWrite, hregX86_XMM6());
   1236          addHRegUse(u, HRmWrite, hregX86_XMM7());
   1237          /* Now we have to state any parameter-carrying registers
   1238             which might be read.  This depends on the regparmness. */
   1239          switch (i->Xin.Call.regparms) {
   1240             case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
   1241             case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
   1242             case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
   1243             case 0: break;
   1244             default: vpanic("getRegUsage_X86Instr:Call:regparms");
   1245          }
   1246          /* Finally, there is the issue that the insn trashes a
   1247             register because the literal target address has to be
   1248             loaded into a register.  Fortunately, for the 0/1/2
   1249             regparm case, we can use EAX, EDX and ECX respectively, so
   1250             this does not cause any further damage.  For the 3-regparm
   1251             case, we'll have to choose another register arbitrarily --
   1252             since A, D and C are used for parameters -- and so we might
   1253             as well choose EDI. */
   1254          if (i->Xin.Call.regparms == 3)
   1255             addHRegUse(u, HRmWrite, hregX86_EDI());
   1256          /* Upshot of this is that the assembler really must observe
   1257             the here-stated convention of which register to use as an
   1258             address temporary, depending on the regparmness: 0==EAX,
   1259             1==EDX, 2==ECX, 3==EDI. */
   1260          return;
   1261       case Xin_Goto:
   1262          addRegUsage_X86RI(u, i->Xin.Goto.dst);
   1263          addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */
   1264          addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */
   1265          if (i->Xin.Goto.jk != Ijk_Boring
   1266              && i->Xin.Goto.jk != Ijk_Call
   1267              && i->Xin.Goto.jk != Ijk_Ret)
   1268             /* note, this is irrelevant since ebp is not actually
   1269                available to the allocator.  But still .. */
   1270             addHRegUse(u, HRmWrite, hregX86_EBP());
   1271          return;
   1272       case Xin_CMov32:
   1273          addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
   1274          addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
   1275          return;
   1276       case Xin_LoadEX:
   1277          addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
   1278          addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
   1279          return;
   1280       case Xin_Store:
   1281          addHRegUse(u, HRmRead, i->Xin.Store.src);
   1282          addRegUsage_X86AMode(u, i->Xin.Store.dst);
   1283          return;
   1284       case Xin_Set32:
   1285          addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
   1286          return;
   1287       case Xin_Bsfr32:
   1288          addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
   1289          addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
   1290          return;
   1291       case Xin_MFence:
   1292          return;
   1293       case Xin_ACAS:
   1294          addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
   1295          addHRegUse(u, HRmRead, hregX86_EBX());
   1296          addHRegUse(u, HRmModify, hregX86_EAX());
   1297          return;
   1298       case Xin_DACAS:
   1299          addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
   1300          addHRegUse(u, HRmRead, hregX86_ECX());
   1301          addHRegUse(u, HRmRead, hregX86_EBX());
   1302          addHRegUse(u, HRmModify, hregX86_EDX());
   1303          addHRegUse(u, HRmModify, hregX86_EAX());
   1304          return;
   1305       case Xin_FpUnary:
   1306          addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
   1307          addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
   1308          return;
   1309       case Xin_FpBinary:
   1310          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
   1311          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
   1312          addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
   1313          return;
   1314       case Xin_FpLdSt:
   1315          addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
   1316          addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
   1317                        i->Xin.FpLdSt.reg);
   1318          return;
   1319       case Xin_FpLdStI:
   1320          addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
   1321          addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
   1322                        i->Xin.FpLdStI.reg);
   1323          return;
   1324       case Xin_Fp64to32:
   1325          addHRegUse(u, HRmRead,  i->Xin.Fp64to32.src);
   1326          addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
   1327          return;
   1328       case Xin_FpCMov:
   1329          addHRegUse(u, HRmRead,   i->Xin.FpCMov.src);
   1330          addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
   1331          return;
   1332       case Xin_FpLdCW:
   1333          addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
   1334          return;
   1335       case Xin_FpStSW_AX:
   1336          addHRegUse(u, HRmWrite, hregX86_EAX());
   1337          return;
   1338       case Xin_FpCmp:
   1339          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
   1340          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
   1341          addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
   1342          addHRegUse(u, HRmWrite, hregX86_EAX());
   1343          return;
   1344       case Xin_SseLdSt:
   1345          addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
   1346          addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
   1347                        i->Xin.SseLdSt.reg);
   1348          return;
   1349       case Xin_SseLdzLO:
   1350          addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
   1351          addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
   1352          return;
   1353       case Xin_SseConst:
   1354          addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
   1355          return;
   1356       case Xin_Sse32Fx4:
   1357          vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
   1358          unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
   1359                          || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
   1360                          || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
   1361          addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
   1362          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1363                        i->Xin.Sse32Fx4.dst);
   1364          return;
   1365       case Xin_Sse32FLo:
   1366          vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
   1367          unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
   1368                          || i->Xin.Sse32FLo.op == Xsse_RSQRTF
   1369                          || i->Xin.Sse32FLo.op == Xsse_SQRTF );
   1370          addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
   1371          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1372                        i->Xin.Sse32FLo.dst);
   1373          return;
   1374       case Xin_Sse64Fx2:
   1375          vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
   1376          unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
   1377                          || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
   1378                          || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
   1379          addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
   1380          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1381                        i->Xin.Sse64Fx2.dst);
   1382          return;
   1383       case Xin_Sse64FLo:
   1384          vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
   1385          unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
   1386                          || i->Xin.Sse64FLo.op == Xsse_RSQRTF
   1387                          || i->Xin.Sse64FLo.op == Xsse_SQRTF );
   1388          addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
   1389          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1390                        i->Xin.Sse64FLo.dst);
   1391          return;
   1392       case Xin_SseReRg:
   1393          if (i->Xin.SseReRg.op == Xsse_XOR
   1394              && i->Xin.SseReRg.src == i->Xin.SseReRg.dst) {
   1395             /* reg-alloc needs to understand 'xor r,r' as a write of r */
   1396             /* (as opposed to a rite of passage :-) */
   1397             addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
   1398          } else {
   1399             addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
   1400             addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
   1401                              ? HRmWrite : HRmModify,
   1402                           i->Xin.SseReRg.dst);
   1403          }
   1404          return;
   1405       case Xin_SseCMov:
   1406          addHRegUse(u, HRmRead,   i->Xin.SseCMov.src);
   1407          addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
   1408          return;
   1409       case Xin_SseShuf:
   1410          addHRegUse(u, HRmRead,  i->Xin.SseShuf.src);
   1411          addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
   1412          return;
   1413       default:
   1414          ppX86Instr(i, False);
   1415          vpanic("getRegUsage_X86Instr");
   1416    }
   1417 }
   1418 
   1419 /* local helper */
   1420 static void mapReg( HRegRemap* m, HReg* r )
   1421 {
   1422    *r = lookupHRegRemap(m, *r);
   1423 }
   1424 
   1425 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
   1426 {
   1427    vassert(mode64 == False);
   1428    switch (i->tag) {
   1429       case Xin_Alu32R:
   1430          mapRegs_X86RMI(m, i->Xin.Alu32R.src);
   1431          mapReg(m, &i->Xin.Alu32R.dst);
   1432          return;
   1433       case Xin_Alu32M:
   1434          mapRegs_X86RI(m, i->Xin.Alu32M.src);
   1435          mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
   1436          return;
   1437       case Xin_Sh32:
   1438          mapReg(m, &i->Xin.Sh32.dst);
   1439          return;
   1440       case Xin_Test32:
   1441          mapRegs_X86RM(m, i->Xin.Test32.dst);
   1442          return;
   1443       case Xin_Unary32:
   1444          mapReg(m, &i->Xin.Unary32.dst);
   1445          return;
   1446       case Xin_Lea32:
   1447          mapRegs_X86AMode(m, i->Xin.Lea32.am);
   1448          mapReg(m, &i->Xin.Lea32.dst);
   1449          return;
   1450       case Xin_MulL:
   1451          mapRegs_X86RM(m, i->Xin.MulL.src);
   1452          return;
   1453       case Xin_Div:
   1454          mapRegs_X86RM(m, i->Xin.Div.src);
   1455          return;
   1456       case Xin_Sh3232:
   1457          mapReg(m, &i->Xin.Sh3232.src);
   1458          mapReg(m, &i->Xin.Sh3232.dst);
   1459          return;
   1460       case Xin_Push:
   1461          mapRegs_X86RMI(m, i->Xin.Push.src);
   1462          return;
   1463       case Xin_Call:
   1464          return;
   1465       case Xin_Goto:
   1466          mapRegs_X86RI(m, i->Xin.Goto.dst);
   1467          return;
   1468       case Xin_CMov32:
   1469          mapRegs_X86RM(m, i->Xin.CMov32.src);
   1470          mapReg(m, &i->Xin.CMov32.dst);
   1471          return;
   1472       case Xin_LoadEX:
   1473          mapRegs_X86AMode(m, i->Xin.LoadEX.src);
   1474          mapReg(m, &i->Xin.LoadEX.dst);
   1475          return;
   1476       case Xin_Store:
   1477          mapReg(m, &i->Xin.Store.src);
   1478          mapRegs_X86AMode(m, i->Xin.Store.dst);
   1479          return;
   1480       case Xin_Set32:
   1481          mapReg(m, &i->Xin.Set32.dst);
   1482          return;
   1483       case Xin_Bsfr32:
   1484          mapReg(m, &i->Xin.Bsfr32.src);
   1485          mapReg(m, &i->Xin.Bsfr32.dst);
   1486          return;
   1487       case Xin_MFence:
   1488          return;
   1489       case Xin_ACAS:
   1490          mapRegs_X86AMode(m, i->Xin.ACAS.addr);
   1491          return;
   1492       case Xin_DACAS:
   1493          mapRegs_X86AMode(m, i->Xin.DACAS.addr);
   1494          return;
   1495       case Xin_FpUnary:
   1496          mapReg(m, &i->Xin.FpUnary.src);
   1497          mapReg(m, &i->Xin.FpUnary.dst);
   1498          return;
   1499       case Xin_FpBinary:
   1500          mapReg(m, &i->Xin.FpBinary.srcL);
   1501          mapReg(m, &i->Xin.FpBinary.srcR);
   1502          mapReg(m, &i->Xin.FpBinary.dst);
   1503          return;
   1504       case Xin_FpLdSt:
   1505          mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
   1506          mapReg(m, &i->Xin.FpLdSt.reg);
   1507          return;
   1508       case Xin_FpLdStI:
   1509          mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
   1510          mapReg(m, &i->Xin.FpLdStI.reg);
   1511          return;
   1512       case Xin_Fp64to32:
   1513          mapReg(m, &i->Xin.Fp64to32.src);
   1514          mapReg(m, &i->Xin.Fp64to32.dst);
   1515          return;
   1516       case Xin_FpCMov:
   1517          mapReg(m, &i->Xin.FpCMov.src);
   1518          mapReg(m, &i->Xin.FpCMov.dst);
   1519          return;
   1520       case Xin_FpLdCW:
   1521          mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
   1522          return;
   1523       case Xin_FpStSW_AX:
   1524          return;
   1525       case Xin_FpCmp:
   1526          mapReg(m, &i->Xin.FpCmp.srcL);
   1527          mapReg(m, &i->Xin.FpCmp.srcR);
   1528          mapReg(m, &i->Xin.FpCmp.dst);
   1529          return;
   1530       case Xin_SseConst:
   1531          mapReg(m, &i->Xin.SseConst.dst);
   1532          return;
   1533       case Xin_SseLdSt:
   1534          mapReg(m, &i->Xin.SseLdSt.reg);
   1535          mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
   1536          break;
   1537       case Xin_SseLdzLO:
   1538          mapReg(m, &i->Xin.SseLdzLO.reg);
   1539          mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
   1540          break;
   1541       case Xin_Sse32Fx4:
   1542          mapReg(m, &i->Xin.Sse32Fx4.src);
   1543          mapReg(m, &i->Xin.Sse32Fx4.dst);
   1544          return;
   1545       case Xin_Sse32FLo:
   1546          mapReg(m, &i->Xin.Sse32FLo.src);
   1547          mapReg(m, &i->Xin.Sse32FLo.dst);
   1548          return;
   1549       case Xin_Sse64Fx2:
   1550          mapReg(m, &i->Xin.Sse64Fx2.src);
   1551          mapReg(m, &i->Xin.Sse64Fx2.dst);
   1552          return;
   1553       case Xin_Sse64FLo:
   1554          mapReg(m, &i->Xin.Sse64FLo.src);
   1555          mapReg(m, &i->Xin.Sse64FLo.dst);
   1556          return;
   1557       case Xin_SseReRg:
   1558          mapReg(m, &i->Xin.SseReRg.src);
   1559          mapReg(m, &i->Xin.SseReRg.dst);
   1560          return;
   1561       case Xin_SseCMov:
   1562          mapReg(m, &i->Xin.SseCMov.src);
   1563          mapReg(m, &i->Xin.SseCMov.dst);
   1564          return;
   1565       case Xin_SseShuf:
   1566          mapReg(m, &i->Xin.SseShuf.src);
   1567          mapReg(m, &i->Xin.SseShuf.dst);
   1568          return;
   1569       default:
   1570          ppX86Instr(i, mode64);
   1571          vpanic("mapRegs_X86Instr");
   1572    }
   1573 }
   1574 
   1575 /* Figure out if i represents a reg-reg move, and if so assign the
   1576    source and destination to *src and *dst.  If in doubt say No.  Used
   1577    by the register allocator to do move coalescing.
   1578 */
   1579 Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst )
   1580 {
   1581    /* Moves between integer regs */
   1582    if (i->tag == Xin_Alu32R) {
   1583       if (i->Xin.Alu32R.op != Xalu_MOV)
   1584          return False;
   1585       if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
   1586          return False;
   1587       *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
   1588       *dst = i->Xin.Alu32R.dst;
   1589       return True;
   1590    }
   1591    /* Moves between FP regs */
   1592    if (i->tag == Xin_FpUnary) {
   1593       if (i->Xin.FpUnary.op != Xfp_MOV)
   1594          return False;
   1595       *src = i->Xin.FpUnary.src;
   1596       *dst = i->Xin.FpUnary.dst;
   1597       return True;
   1598    }
   1599    if (i->tag == Xin_SseReRg) {
   1600       if (i->Xin.SseReRg.op != Xsse_MOV)
   1601          return False;
   1602       *src = i->Xin.SseReRg.src;
   1603       *dst = i->Xin.SseReRg.dst;
   1604       return True;
   1605    }
   1606    return False;
   1607 }
   1608 
   1609 
   1610 /* Generate x86 spill/reload instructions under the direction of the
   1611    register allocator.  Note it's critical these don't write the
   1612    condition codes. */
   1613 
   1614 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1615                     HReg rreg, Int offsetB, Bool mode64 )
   1616 {
   1617    X86AMode* am;
   1618    vassert(offsetB >= 0);
   1619    vassert(!hregIsVirtual(rreg));
   1620    vassert(mode64 == False);
   1621    *i1 = *i2 = NULL;
   1622    am = X86AMode_IR(offsetB, hregX86_EBP());
   1623    switch (hregClass(rreg)) {
   1624       case HRcInt32:
   1625          *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
   1626          return;
   1627       case HRcFlt64:
   1628          *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
   1629          return;
   1630       case HRcVec128:
   1631          *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
   1632          return;
   1633       default:
   1634          ppHRegClass(hregClass(rreg));
   1635          vpanic("genSpill_X86: unimplemented regclass");
   1636    }
   1637 }
   1638 
   1639 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1640                      HReg rreg, Int offsetB, Bool mode64 )
   1641 {
   1642    X86AMode* am;
   1643    vassert(offsetB >= 0);
   1644    vassert(!hregIsVirtual(rreg));
   1645    vassert(mode64 == False);
   1646    *i1 = *i2 = NULL;
   1647    am = X86AMode_IR(offsetB, hregX86_EBP());
   1648    switch (hregClass(rreg)) {
   1649       case HRcInt32:
   1650          *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
   1651          return;
   1652       case HRcFlt64:
   1653          *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
   1654          return;
   1655       case HRcVec128:
   1656          *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
   1657          return;
   1658       default:
   1659          ppHRegClass(hregClass(rreg));
   1660          vpanic("genReload_X86: unimplemented regclass");
   1661    }
   1662 }
   1663 
   1664 /* The given instruction reads the specified vreg exactly once, and
   1665    that vreg is currently located at the given spill offset.  If
   1666    possible, return a variant of the instruction to one which instead
   1667    references the spill slot directly. */
   1668 
   1669 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
   1670 {
   1671    vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
   1672 
   1673    /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
   1674       Convert to: src=RMI_Mem, dst=Reg
   1675    */
   1676    if (i->tag == Xin_Alu32R
   1677        && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
   1678            || i->Xin.Alu32R.op == Xalu_XOR)
   1679        && i->Xin.Alu32R.src->tag == Xrmi_Reg
   1680        && i->Xin.Alu32R.src->Xrmi.Reg.reg == vreg) {
   1681       vassert(i->Xin.Alu32R.dst != vreg);
   1682       return X86Instr_Alu32R(
   1683                 i->Xin.Alu32R.op,
   1684                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
   1685                 i->Xin.Alu32R.dst
   1686              );
   1687    }
   1688 
   1689    /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
   1690       Convert to: src=RI_Imm, dst=Mem
   1691    */
   1692    if (i->tag == Xin_Alu32R
   1693        && (i->Xin.Alu32R.op == Xalu_CMP)
   1694        && i->Xin.Alu32R.src->tag == Xrmi_Imm
   1695        && i->Xin.Alu32R.dst == vreg) {
   1696       return X86Instr_Alu32M(
   1697                 i->Xin.Alu32R.op,
   1698 		X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
   1699                 X86AMode_IR( spill_off, hregX86_EBP())
   1700              );
   1701    }
   1702 
   1703    /* Deal with form: Push(RMI_Reg)
   1704       Convert to: Push(RMI_Mem)
   1705    */
   1706    if (i->tag == Xin_Push
   1707        && i->Xin.Push.src->tag == Xrmi_Reg
   1708        && i->Xin.Push.src->Xrmi.Reg.reg == vreg) {
   1709       return X86Instr_Push(
   1710                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
   1711              );
   1712    }
   1713 
   1714    /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
   1715       Convert to CMov32(RM_Mem, dst) */
   1716    if (i->tag == Xin_CMov32
   1717        && i->Xin.CMov32.src->tag == Xrm_Reg
   1718        && i->Xin.CMov32.src->Xrm.Reg.reg == vreg) {
   1719       vassert(i->Xin.CMov32.dst != vreg);
   1720       return X86Instr_CMov32(
   1721                 i->Xin.CMov32.cond,
   1722                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
   1723                 i->Xin.CMov32.dst
   1724              );
   1725    }
   1726 
   1727    /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
   1728    if (i->tag == Xin_Test32
   1729        && i->Xin.Test32.dst->tag == Xrm_Reg
   1730        && i->Xin.Test32.dst->Xrm.Reg.reg == vreg) {
   1731       return X86Instr_Test32(
   1732                 i->Xin.Test32.imm32,
   1733                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
   1734              );
   1735    }
   1736 
   1737    return NULL;
   1738 }
   1739 
   1740 
   1741 /* --------- The x86 assembler (bleh.) --------- */
   1742 
   1743 static UChar iregNo ( HReg r )
   1744 {
   1745    UInt n;
   1746    vassert(hregClass(r) == HRcInt32);
   1747    vassert(!hregIsVirtual(r));
   1748    n = hregNumber(r);
   1749    vassert(n <= 7);
   1750    return toUChar(n);
   1751 }
   1752 
   1753 static UInt fregNo ( HReg r )
   1754 {
   1755    UInt n;
   1756    vassert(hregClass(r) == HRcFlt64);
   1757    vassert(!hregIsVirtual(r));
   1758    n = hregNumber(r);
   1759    vassert(n <= 5);
   1760    return n;
   1761 }
   1762 
   1763 static UInt vregNo ( HReg r )
   1764 {
   1765    UInt n;
   1766    vassert(hregClass(r) == HRcVec128);
   1767    vassert(!hregIsVirtual(r));
   1768    n = hregNumber(r);
   1769    vassert(n <= 7);
   1770    return n;
   1771 }
   1772 
   1773 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
   1774 {
   1775    return toUChar( ((mod & 3) << 6)
   1776                    | ((reg & 7) << 3)
   1777                    | (regmem & 7) );
   1778 }
   1779 
   1780 static UChar mkSIB ( Int shift, Int regindex, Int regbase )
   1781 {
   1782    return toUChar( ((shift & 3) << 6)
   1783                    | ((regindex & 7) << 3)
   1784                    | (regbase & 7) );
   1785 }
   1786 
   1787 static UChar* emit32 ( UChar* p, UInt w32 )
   1788 {
   1789    *p++ = toUChar( w32        & 0x000000FF);
   1790    *p++ = toUChar((w32 >>  8) & 0x000000FF);
   1791    *p++ = toUChar((w32 >> 16) & 0x000000FF);
   1792    *p++ = toUChar((w32 >> 24) & 0x000000FF);
   1793    return p;
   1794 }
   1795 
   1796 /* Does a sign-extend of the lowest 8 bits give
   1797    the original number? */
   1798 static Bool fits8bits ( UInt w32 )
   1799 {
   1800    Int i32 = (Int)w32;
   1801    return toBool(i32 == ((i32 << 24) >> 24));
   1802 }
   1803 
   1804 
   1805 /* Forming mod-reg-rm bytes and scale-index-base bytes.
   1806 
   1807      greg,  0(ereg)    |  ereg != ESP && ereg != EBP
   1808                        =  00 greg ereg
   1809 
   1810      greg,  d8(ereg)   |  ereg != ESP
   1811                        =  01 greg ereg, d8
   1812 
   1813      greg,  d32(ereg)  |  ereg != ESP
   1814                        =  10 greg ereg, d32
   1815 
   1816      greg,  d8(%esp)   =  01 greg 100, 0x24, d8
   1817 
   1818      -----------------------------------------------
   1819 
   1820      greg,  d8(base,index,scale)
   1821                |  index != ESP
   1822                =  01 greg 100, scale index base, d8
   1823 
   1824      greg,  d32(base,index,scale)
   1825                |  index != ESP
   1826                =  10 greg 100, scale index base, d32
   1827 */
   1828 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
   1829 {
   1830    if (am->tag == Xam_IR) {
   1831       if (am->Xam.IR.imm == 0
   1832           && am->Xam.IR.reg != hregX86_ESP()
   1833           && am->Xam.IR.reg != hregX86_EBP() ) {
   1834          *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg));
   1835          return p;
   1836       }
   1837       if (fits8bits(am->Xam.IR.imm)
   1838           && am->Xam.IR.reg != hregX86_ESP()) {
   1839          *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg));
   1840          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
   1841          return p;
   1842       }
   1843       if (am->Xam.IR.reg != hregX86_ESP()) {
   1844          *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg));
   1845          p = emit32(p, am->Xam.IR.imm);
   1846          return p;
   1847       }
   1848       if (am->Xam.IR.reg == hregX86_ESP()
   1849           && fits8bits(am->Xam.IR.imm)) {
   1850  	 *p++ = mkModRegRM(1, iregNo(greg), 4);
   1851          *p++ = 0x24;
   1852          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
   1853          return p;
   1854       }
   1855       ppX86AMode(am);
   1856       vpanic("doAMode_M: can't emit amode IR");
   1857       /*NOTREACHED*/
   1858    }
   1859    if (am->tag == Xam_IRRS) {
   1860       if (fits8bits(am->Xam.IRRS.imm)
   1861           && am->Xam.IRRS.index != hregX86_ESP()) {
   1862          *p++ = mkModRegRM(1, iregNo(greg), 4);
   1863          *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index,
   1864                                           am->Xam.IRRS.base);
   1865          *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
   1866          return p;
   1867       }
   1868       if (am->Xam.IRRS.index != hregX86_ESP()) {
   1869          *p++ = mkModRegRM(2, iregNo(greg), 4);
   1870          *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index,
   1871                                           am->Xam.IRRS.base);
   1872          p = emit32(p, am->Xam.IRRS.imm);
   1873          return p;
   1874       }
   1875       ppX86AMode(am);
   1876       vpanic("doAMode_M: can't emit amode IRRS");
   1877       /*NOTREACHED*/
   1878    }
   1879    vpanic("doAMode_M: unknown amode");
   1880    /*NOTREACHED*/
   1881 }
   1882 
   1883 
   1884 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
   1885 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
   1886 {
   1887    *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg));
   1888    return p;
   1889 }
   1890 
   1891 
   1892 /* Emit ffree %st(7) */
   1893 static UChar* do_ffree_st7 ( UChar* p )
   1894 {
   1895    *p++ = 0xDD;
   1896    *p++ = 0xC7;
   1897    return p;
   1898 }
   1899 
   1900 /* Emit fstp %st(i), 1 <= i <= 7 */
   1901 static UChar* do_fstp_st ( UChar* p, Int i )
   1902 {
   1903    vassert(1 <= i && i <= 7);
   1904    *p++ = 0xDD;
   1905    *p++ = toUChar(0xD8+i);
   1906    return p;
   1907 }
   1908 
   1909 /* Emit fld %st(i), 0 <= i <= 6 */
   1910 static UChar* do_fld_st ( UChar* p, Int i )
   1911 {
   1912    vassert(0 <= i && i <= 6);
   1913    *p++ = 0xD9;
   1914    *p++ = toUChar(0xC0+i);
   1915    return p;
   1916 }
   1917 
   1918 /* Emit f<op> %st(0) */
   1919 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
   1920 {
   1921    switch (op) {
   1922       case Xfp_NEG:    *p++ = 0xD9; *p++ = 0xE0; break;
   1923       case Xfp_ABS:    *p++ = 0xD9; *p++ = 0xE1; break;
   1924       case Xfp_SQRT:   *p++ = 0xD9; *p++ = 0xFA; break;
   1925       case Xfp_ROUND:  *p++ = 0xD9; *p++ = 0xFC; break;
   1926       case Xfp_SIN:    *p++ = 0xD9; *p++ = 0xFE; break;
   1927       case Xfp_COS:    *p++ = 0xD9; *p++ = 0xFF; break;
   1928       case Xfp_2XM1:   *p++ = 0xD9; *p++ = 0xF0; break;
   1929       case Xfp_MOV:    break;
   1930       case Xfp_TAN:    p = do_ffree_st7(p); /* since fptan pushes 1.0 */
   1931                        *p++ = 0xD9; *p++ = 0xF2; /* fptan */
   1932                        *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
   1933                        break;
   1934       default: vpanic("do_fop1_st: unknown op");
   1935    }
   1936    return p;
   1937 }
   1938 
   1939 /* Emit f<op> %st(i), 1 <= i <= 5 */
   1940 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
   1941 {
   1942 #  define fake(_n) mkHReg((_n), HRcInt32, False)
   1943    Int subopc;
   1944    switch (op) {
   1945       case Xfp_ADD: subopc = 0; break;
   1946       case Xfp_SUB: subopc = 4; break;
   1947       case Xfp_MUL: subopc = 1; break;
   1948       case Xfp_DIV: subopc = 6; break;
   1949       default: vpanic("do_fop2_st: unknown op");
   1950    }
   1951    *p++ = 0xD8;
   1952    p    = doAMode_R(p, fake(subopc), fake(i));
   1953    return p;
   1954 #  undef fake
   1955 }
   1956 
   1957 /* Push a 32-bit word on the stack.  The word depends on tags[3:0];
   1958 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
   1959 */
   1960 static UChar* push_word_from_tags ( UChar* p, UShort tags )
   1961 {
   1962    UInt w;
   1963    vassert(0 == (tags & ~0xF));
   1964    if (tags == 0) {
   1965       /* pushl $0x00000000 */
   1966       *p++ = 0x6A;
   1967       *p++ = 0x00;
   1968    }
   1969    else
   1970    /* pushl $0xFFFFFFFF */
   1971    if (tags == 0xF) {
   1972       *p++ = 0x6A;
   1973       *p++ = 0xFF;
   1974    } else {
   1975       vassert(0); /* awaiting test case */
   1976       w = 0;
   1977       if (tags & 1) w |= 0x000000FF;
   1978       if (tags & 2) w |= 0x0000FF00;
   1979       if (tags & 4) w |= 0x00FF0000;
   1980       if (tags & 8) w |= 0xFF000000;
   1981       *p++ = 0x68;
   1982       p = emit32(p, w);
   1983    }
   1984    return p;
   1985 }
   1986 
   1987 /* Emit an instruction into buf and return the number of bytes used.
   1988    Note that buf is not the insn's final place, and therefore it is
   1989    imperative to emit position-independent code. */
   1990 
   1991 Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i,
   1992                     Bool mode64,
   1993                     void* dispatch_unassisted,
   1994                     void* dispatch_assisted )
   1995 {
   1996    UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
   1997 
   1998    UInt   xtra;
   1999    UChar* p = &buf[0];
   2000    UChar* ptmp;
   2001    vassert(nbuf >= 32);
   2002    vassert(mode64 == False);
   2003 
   2004    /* Wrap an integer as a int register, for use assembling
   2005       GrpN insns, in which the greg field is used as a sub-opcode
   2006       and does not really contain a register. */
   2007 #  define fake(_n) mkHReg((_n), HRcInt32, False)
   2008 
   2009    /* vex_printf("asm  ");ppX86Instr(i, mode64); vex_printf("\n"); */
   2010 
   2011    switch (i->tag) {
   2012 
   2013    case Xin_Alu32R:
   2014       /* Deal specially with MOV */
   2015       if (i->Xin.Alu32R.op == Xalu_MOV) {
   2016          switch (i->Xin.Alu32R.src->tag) {
   2017             case Xrmi_Imm:
   2018                *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst));
   2019                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2020                goto done;
   2021             case Xrmi_Reg:
   2022                *p++ = 0x89;
   2023                p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
   2024                                 i->Xin.Alu32R.dst);
   2025                goto done;
   2026             case Xrmi_Mem:
   2027                *p++ = 0x8B;
   2028                p = doAMode_M(p, i->Xin.Alu32R.dst,
   2029                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
   2030                goto done;
   2031             default:
   2032                goto bad;
   2033          }
   2034       }
   2035       /* MUL */
   2036       if (i->Xin.Alu32R.op == Xalu_MUL) {
   2037          switch (i->Xin.Alu32R.src->tag) {
   2038             case Xrmi_Reg:
   2039                *p++ = 0x0F;
   2040                *p++ = 0xAF;
   2041                p = doAMode_R(p, i->Xin.Alu32R.dst,
   2042                                 i->Xin.Alu32R.src->Xrmi.Reg.reg);
   2043                goto done;
   2044             case Xrmi_Mem:
   2045                *p++ = 0x0F;
   2046                *p++ = 0xAF;
   2047                p = doAMode_M(p, i->Xin.Alu32R.dst,
   2048                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
   2049                goto done;
   2050             case Xrmi_Imm:
   2051                if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2052                   *p++ = 0x6B;
   2053                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
   2054                   *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2055                } else {
   2056                   *p++ = 0x69;
   2057                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
   2058                   p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2059                }
   2060                goto done;
   2061             default:
   2062                goto bad;
   2063          }
   2064       }
   2065       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
   2066       opc = opc_rr = subopc_imm = opc_imma = 0;
   2067       switch (i->Xin.Alu32R.op) {
   2068          case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
   2069                         subopc_imm = 2; opc_imma = 0x15; break;
   2070          case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
   2071                         subopc_imm = 0; opc_imma = 0x05; break;
   2072          case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
   2073                         subopc_imm = 5; opc_imma = 0x2D; break;
   2074          case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
   2075                         subopc_imm = 3; opc_imma = 0x1D; break;
   2076          case Xalu_AND: opc = 0x23; opc_rr = 0x21;
   2077                         subopc_imm = 4; opc_imma = 0x25; break;
   2078          case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
   2079                         subopc_imm = 6; opc_imma = 0x35; break;
   2080          case Xalu_OR:  opc = 0x0B; opc_rr = 0x09;
   2081                         subopc_imm = 1; opc_imma = 0x0D; break;
   2082          case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
   2083                         subopc_imm = 7; opc_imma = 0x3D; break;
   2084          default: goto bad;
   2085       }
   2086       switch (i->Xin.Alu32R.src->tag) {
   2087          case Xrmi_Imm:
   2088             if (i->Xin.Alu32R.dst == hregX86_EAX()
   2089                 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2090                *p++ = toUChar(opc_imma);
   2091                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2092             } else
   2093             if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2094                *p++ = 0x83;
   2095                p    = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
   2096                *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2097             } else {
   2098                *p++ = 0x81;
   2099                p    = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
   2100                p    = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2101             }
   2102             goto done;
   2103          case Xrmi_Reg:
   2104             *p++ = toUChar(opc_rr);
   2105             p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
   2106                              i->Xin.Alu32R.dst);
   2107             goto done;
   2108          case Xrmi_Mem:
   2109             *p++ = toUChar(opc);
   2110             p = doAMode_M(p, i->Xin.Alu32R.dst,
   2111                              i->Xin.Alu32R.src->Xrmi.Mem.am);
   2112             goto done;
   2113          default:
   2114             goto bad;
   2115       }
   2116       break;
   2117 
   2118    case Xin_Alu32M:
   2119       /* Deal specially with MOV */
   2120       if (i->Xin.Alu32M.op == Xalu_MOV) {
   2121          switch (i->Xin.Alu32M.src->tag) {
   2122             case Xri_Reg:
   2123                *p++ = 0x89;
   2124                p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2125                                 i->Xin.Alu32M.dst);
   2126                goto done;
   2127             case Xri_Imm:
   2128                *p++ = 0xC7;
   2129                p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst);
   2130                p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2131                goto done;
   2132             default:
   2133                goto bad;
   2134          }
   2135       }
   2136       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP.  MUL is not
   2137          allowed here. */
   2138       opc = subopc_imm = opc_imma = 0;
   2139       switch (i->Xin.Alu32M.op) {
   2140          case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
   2141          case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
   2142          case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
   2143          default: goto bad;
   2144       }
   2145       switch (i->Xin.Alu32M.src->tag) {
   2146          case Xri_Reg:
   2147             *p++ = toUChar(opc);
   2148             p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2149                              i->Xin.Alu32M.dst);
   2150             goto done;
   2151          case Xri_Imm:
   2152             if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
   2153                *p++ = 0x83;
   2154                p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
   2155                *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
   2156                goto done;
   2157             } else {
   2158                *p++ = 0x81;
   2159                p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
   2160                p    = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2161                goto done;
   2162             }
   2163          default:
   2164             goto bad;
   2165       }
   2166       break;
   2167 
   2168    case Xin_Sh32:
   2169       opc_cl = opc_imm = subopc = 0;
   2170       switch (i->Xin.Sh32.op) {
   2171          case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
   2172          case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
   2173          case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
   2174          default: goto bad;
   2175       }
   2176       if (i->Xin.Sh32.src == 0) {
   2177          *p++ = toUChar(opc_cl);
   2178          p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
   2179       } else {
   2180          *p++ = toUChar(opc_imm);
   2181          p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
   2182          *p++ = (UChar)(i->Xin.Sh32.src);
   2183       }
   2184       goto done;
   2185 
   2186    case Xin_Test32:
   2187       if (i->Xin.Test32.dst->tag == Xrm_Reg) {
   2188          /* testl $imm32, %reg */
   2189          *p++ = 0xF7;
   2190          p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg);
   2191          p = emit32(p, i->Xin.Test32.imm32);
   2192          goto done;
   2193       } else {
   2194          /* testl $imm32, amode */
   2195          *p++ = 0xF7;
   2196          p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am);
   2197          p = emit32(p, i->Xin.Test32.imm32);
   2198          goto done;
   2199       }
   2200 
   2201    case Xin_Unary32:
   2202       if (i->Xin.Unary32.op == Xun_NOT) {
   2203          *p++ = 0xF7;
   2204          p = doAMode_R(p, fake(2), i->Xin.Unary32.dst);
   2205          goto done;
   2206       }
   2207       if (i->Xin.Unary32.op == Xun_NEG) {
   2208          *p++ = 0xF7;
   2209          p = doAMode_R(p, fake(3), i->Xin.Unary32.dst);
   2210          goto done;
   2211       }
   2212       break;
   2213 
   2214    case Xin_Lea32:
   2215       *p++ = 0x8D;
   2216       p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
   2217       goto done;
   2218 
   2219    case Xin_MulL:
   2220       subopc = i->Xin.MulL.syned ? 5 : 4;
   2221       *p++ = 0xF7;
   2222       switch (i->Xin.MulL.src->tag)  {
   2223          case Xrm_Mem:
   2224             p = doAMode_M(p, fake(subopc),
   2225                              i->Xin.MulL.src->Xrm.Mem.am);
   2226             goto done;
   2227          case Xrm_Reg:
   2228             p = doAMode_R(p, fake(subopc),
   2229                              i->Xin.MulL.src->Xrm.Reg.reg);
   2230             goto done;
   2231          default:
   2232             goto bad;
   2233       }
   2234       break;
   2235 
   2236    case Xin_Div:
   2237       subopc = i->Xin.Div.syned ? 7 : 6;
   2238       *p++ = 0xF7;
   2239       switch (i->Xin.Div.src->tag)  {
   2240          case Xrm_Mem:
   2241             p = doAMode_M(p, fake(subopc),
   2242                              i->Xin.Div.src->Xrm.Mem.am);
   2243             goto done;
   2244          case Xrm_Reg:
   2245             p = doAMode_R(p, fake(subopc),
   2246                              i->Xin.Div.src->Xrm.Reg.reg);
   2247             goto done;
   2248          default:
   2249             goto bad;
   2250       }
   2251       break;
   2252 
   2253    case Xin_Sh3232:
   2254       vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
   2255       if (i->Xin.Sh3232.amt == 0) {
   2256          /* shldl/shrdl by %cl */
   2257          *p++ = 0x0F;
   2258          if (i->Xin.Sh3232.op == Xsh_SHL) {
   2259             *p++ = 0xA5;
   2260          } else {
   2261             *p++ = 0xAD;
   2262          }
   2263          p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
   2264          goto done;
   2265       }
   2266       break;
   2267 
   2268    case Xin_Push:
   2269       switch (i->Xin.Push.src->tag) {
   2270          case Xrmi_Mem:
   2271             *p++ = 0xFF;
   2272             p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am);
   2273             goto done;
   2274          case Xrmi_Imm:
   2275             *p++ = 0x68;
   2276             p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
   2277             goto done;
   2278          case Xrmi_Reg:
   2279             *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg));
   2280             goto done;
   2281         default:
   2282             goto bad;
   2283       }
   2284 
   2285    case Xin_Call:
   2286       /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
   2287          for explanation of this. */
   2288       switch (i->Xin.Call.regparms) {
   2289          case 0: irno = iregNo(hregX86_EAX()); break;
   2290          case 1: irno = iregNo(hregX86_EDX()); break;
   2291          case 2: irno = iregNo(hregX86_ECX()); break;
   2292          case 3: irno = iregNo(hregX86_EDI()); break;
   2293          default: vpanic(" emit_X86Instr:call:regparms");
   2294       }
   2295       /* jump over the following two insns if the condition does not
   2296          hold */
   2297       if (i->Xin.Call.cond != Xcc_ALWAYS) {
   2298          *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
   2299          *p++ = 0x07; /* 7 bytes in the next two insns */
   2300       }
   2301       /* movl $target, %tmp */
   2302       *p++ = toUChar(0xB8 + irno);
   2303       p = emit32(p, i->Xin.Call.target);
   2304       /* call *%tmp */
   2305       *p++ = 0xFF;
   2306       *p++ = toUChar(0xD0 + irno);
   2307       goto done;
   2308 
   2309    case Xin_Goto: {
   2310       void* dispatch_to_use = NULL;
   2311       vassert(dispatch_unassisted != NULL);
   2312       vassert(dispatch_assisted != NULL);
   2313 
   2314       /* Use ptmp for backpatching conditional jumps. */
   2315       ptmp = NULL;
   2316 
   2317       /* First off, if this is conditional, create a conditional
   2318 	 jump over the rest of it. */
   2319       if (i->Xin.Goto.cond != Xcc_ALWAYS) {
   2320          /* jmp fwds if !condition */
   2321          *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1)));
   2322          ptmp = p; /* fill in this bit later */
   2323          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2324       }
   2325 
   2326       /* If a non-boring, set %ebp (the guest state pointer)
   2327          appropriately.  Also, decide which dispatcher we need to
   2328          use. */
   2329       dispatch_to_use = dispatch_assisted;
   2330 
   2331       /* movl $magic_number, %ebp */
   2332       switch (i->Xin.Goto.jk) {
   2333          case Ijk_ClientReq:
   2334             *p++ = 0xBD;
   2335             p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
   2336          case Ijk_Sys_int128:
   2337             *p++ = 0xBD;
   2338             p = emit32(p, VEX_TRC_JMP_SYS_INT128); break;
   2339          case Ijk_Sys_int129:
   2340             *p++ = 0xBD;
   2341             p = emit32(p, VEX_TRC_JMP_SYS_INT129); break;
   2342          case Ijk_Sys_int130:
   2343             *p++ = 0xBD;
   2344             p = emit32(p, VEX_TRC_JMP_SYS_INT130); break;
   2345          case Ijk_Yield:
   2346             *p++ = 0xBD;
   2347             p = emit32(p, VEX_TRC_JMP_YIELD); break;
   2348          case Ijk_YieldNoRedir:
   2349             *p++ = 0xBD;
   2350             p = emit32(p, VEX_TRC_JMP_YIELD_NOREDIR); break;
   2351          case Ijk_EmWarn:
   2352             *p++ = 0xBD;
   2353             p = emit32(p, VEX_TRC_JMP_EMWARN); break;
   2354          case Ijk_MapFail:
   2355             *p++ = 0xBD;
   2356             p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
   2357          case Ijk_NoDecode:
   2358             *p++ = 0xBD;
   2359             p = emit32(p, VEX_TRC_JMP_NODECODE); break;
   2360          case Ijk_TInval:
   2361             *p++ = 0xBD;
   2362             p = emit32(p, VEX_TRC_JMP_TINVAL); break;
   2363          case Ijk_NoRedir:
   2364             *p++ = 0xBD;
   2365             p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
   2366          case Ijk_Sys_sysenter:
   2367             *p++ = 0xBD;
   2368             p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break;
   2369          case Ijk_SigTRAP:
   2370             *p++ = 0xBD;
   2371             p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
   2372          case Ijk_SigSEGV:
   2373             *p++ = 0xBD;
   2374             p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
   2375          case Ijk_Ret:
   2376 	 case Ijk_Call:
   2377          case Ijk_Boring:
   2378             dispatch_to_use = dispatch_unassisted;
   2379             break;
   2380          default:
   2381             ppIRJumpKind(i->Xin.Goto.jk);
   2382             vpanic("emit_X86Instr.Xin_Goto: unknown jump kind");
   2383       }
   2384 
   2385       /* Get the destination address into %eax */
   2386       if (i->Xin.Goto.dst->tag == Xri_Imm) {
   2387          /* movl $immediate, %eax */
   2388          *p++ = 0xB8;
   2389          p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32);
   2390       } else {
   2391          vassert(i->Xin.Goto.dst->tag == Xri_Reg);
   2392          /* movl %reg, %eax */
   2393          if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) {
   2394             *p++ = 0x89;
   2395             p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX());
   2396          }
   2397       }
   2398 
   2399       /* Get the dispatcher address into %edx.  This has to happen
   2400          after the load of %eax since %edx might be carrying the value
   2401          destined for %eax immediately prior to this Xin_Goto. */
   2402       vassert(sizeof(UInt) == sizeof(void*));
   2403       vassert(dispatch_to_use != NULL);
   2404       /* movl $imm32, %edx */
   2405       *p++ = 0xBA;
   2406       p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use));
   2407 
   2408       /* jmp *%edx */
   2409       *p++ = 0xFF;
   2410       *p++ = 0xE2;
   2411 
   2412       /* Fix up the conditional jump, if there was one. */
   2413       if (i->Xin.Goto.cond != Xcc_ALWAYS) {
   2414          Int delta = p - ptmp;
   2415 	 vassert(delta > 0 && delta < 20);
   2416          *ptmp = toUChar(delta-1);
   2417       }
   2418       goto done;
   2419    }
   2420 
   2421    case Xin_CMov32:
   2422       vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
   2423 
   2424       /* This generates cmov, which is illegal on P54/P55. */
   2425       /*
   2426       *p++ = 0x0F;
   2427       *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
   2428       if (i->Xin.CMov32.src->tag == Xrm_Reg) {
   2429          p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
   2430          goto done;
   2431       }
   2432       if (i->Xin.CMov32.src->tag == Xrm_Mem) {
   2433          p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
   2434          goto done;
   2435       }
   2436       */
   2437 
   2438       /* Alternative version which works on any x86 variant. */
   2439       /* jmp fwds if !condition */
   2440       *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
   2441       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   2442       ptmp = p;
   2443 
   2444       switch (i->Xin.CMov32.src->tag) {
   2445          case Xrm_Reg:
   2446             /* Big sigh.  This is movl E -> G ... */
   2447             *p++ = 0x89;
   2448             p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
   2449                              i->Xin.CMov32.dst);
   2450 
   2451             break;
   2452          case Xrm_Mem:
   2453             /* ... whereas this is movl G -> E.  That's why the args
   2454                to doAMode_R appear to be the wrong way round in the
   2455                Xrm_Reg case. */
   2456             *p++ = 0x8B;
   2457             p = doAMode_M(p, i->Xin.CMov32.dst,
   2458                              i->Xin.CMov32.src->Xrm.Mem.am);
   2459             break;
   2460          default:
   2461             goto bad;
   2462       }
   2463       /* Fill in the jump offset. */
   2464       *(ptmp-1) = toUChar(p - ptmp);
   2465       goto done;
   2466 
   2467       break;
   2468 
   2469    case Xin_LoadEX:
   2470       if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
   2471          /* movzbl */
   2472          *p++ = 0x0F;
   2473          *p++ = 0xB6;
   2474          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2475          goto done;
   2476       }
   2477       if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
   2478          /* movzwl */
   2479          *p++ = 0x0F;
   2480          *p++ = 0xB7;
   2481          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2482          goto done;
   2483       }
   2484       if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
   2485          /* movsbl */
   2486          *p++ = 0x0F;
   2487          *p++ = 0xBE;
   2488          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2489          goto done;
   2490       }
   2491       break;
   2492 
   2493    case Xin_Set32:
   2494       /* Make the destination register be 1 or 0, depending on whether
   2495          the relevant condition holds.  We have to dodge and weave
   2496          when the destination is %esi or %edi as we cannot directly
   2497          emit the native 'setb %reg' for those.  Further complication:
   2498          the top 24 bits of the destination should be forced to zero,
   2499          but doing 'xor %r,%r' kills the flag(s) we are about to read.
   2500          Sigh.  So start off my moving $0 into the dest. */
   2501 
   2502       /* Do we need to swap in %eax? */
   2503       if (iregNo(i->Xin.Set32.dst) >= 4) {
   2504          /* xchg %eax, %dst */
   2505          *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
   2506          /* movl $0, %eax */
   2507          *p++ =toUChar(0xB8 + iregNo(hregX86_EAX()));
   2508          p = emit32(p, 0);
   2509          /* setb lo8(%eax) */
   2510          *p++ = 0x0F;
   2511          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
   2512          p = doAMode_R(p, fake(0), hregX86_EAX());
   2513          /* xchg %eax, %dst */
   2514          *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
   2515       } else {
   2516          /* movl $0, %dst */
   2517          *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst));
   2518          p = emit32(p, 0);
   2519          /* setb lo8(%dst) */
   2520          *p++ = 0x0F;
   2521          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
   2522          p = doAMode_R(p, fake(0), i->Xin.Set32.dst);
   2523       }
   2524       goto done;
   2525 
   2526    case Xin_Bsfr32:
   2527       *p++ = 0x0F;
   2528       if (i->Xin.Bsfr32.isFwds) {
   2529          *p++ = 0xBC;
   2530       } else {
   2531          *p++ = 0xBD;
   2532       }
   2533       p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
   2534       goto done;
   2535 
   2536    case Xin_MFence:
   2537       /* see comment in hdefs.h re this insn */
   2538       if (0) vex_printf("EMIT FENCE\n");
   2539       if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
   2540                                   |VEX_HWCAPS_X86_SSE2)) {
   2541          /* mfence */
   2542          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
   2543          goto done;
   2544       }
   2545       if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) {
   2546          /* sfence */
   2547          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
   2548          /* lock addl $0,0(%esp) */
   2549          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
   2550          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
   2551          goto done;
   2552       }
   2553       if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
   2554          /* lock addl $0,0(%esp) */
   2555          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
   2556          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
   2557          goto done;
   2558       }
   2559       vpanic("emit_X86Instr:mfence:hwcaps");
   2560       /*NOTREACHED*/
   2561       break;
   2562 
   2563    case Xin_ACAS:
   2564       /* lock */
   2565       *p++ = 0xF0;
   2566       /* cmpxchg{b,w,l} %ebx,mem.  Expected-value in %eax, new value
   2567          in %ebx.  The new-value register is hardwired to be %ebx
   2568          since letting it be any integer register gives the problem
   2569          that %sil and %dil are unaddressible on x86 and hence we
   2570          would have to resort to the same kind of trickery as with
   2571          byte-sized Xin.Store, just below.  Given that this isn't
   2572          performance critical, it is simpler just to force the
   2573          register operand to %ebx (could equally be %ecx or %edx).
   2574          (Although %ebx is more consistent with cmpxchg8b.) */
   2575       if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
   2576       *p++ = 0x0F;
   2577       if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
   2578       p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
   2579       goto done;
   2580 
   2581    case Xin_DACAS:
   2582       /* lock */
   2583       *p++ = 0xF0;
   2584       /* cmpxchg8b m64.  Expected-value in %edx:%eax, new value
   2585          in %ecx:%ebx.  All 4 regs are hardwired in the ISA, so
   2586          aren't encoded in the insn. */
   2587       *p++ = 0x0F;
   2588       *p++ = 0xC7;
   2589       p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
   2590       goto done;
   2591 
   2592    case Xin_Store:
   2593       if (i->Xin.Store.sz == 2) {
   2594          /* This case, at least, is simple, given that we can
   2595             reference the low 16 bits of any integer register. */
   2596          *p++ = 0x66;
   2597          *p++ = 0x89;
   2598          p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
   2599          goto done;
   2600       }
   2601 
   2602       if (i->Xin.Store.sz == 1) {
   2603          /* We have to do complex dodging and weaving if src is not
   2604             the low 8 bits of %eax/%ebx/%ecx/%edx. */
   2605          if (iregNo(i->Xin.Store.src) < 4) {
   2606             /* we're OK, can do it directly */
   2607             *p++ = 0x88;
   2608             p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
   2609            goto done;
   2610          } else {
   2611             /* Bleh.  This means the source is %edi or %esi.  Since
   2612                the address mode can only mention three registers, at
   2613                least one of %eax/%ebx/%ecx/%edx must be available to
   2614                temporarily swap the source into, so the store can
   2615                happen.  So we have to look at the regs mentioned
   2616                in the amode. */
   2617             HReg swap = INVALID_HREG;
   2618             HReg  eax = hregX86_EAX(), ebx = hregX86_EBX(),
   2619                   ecx = hregX86_ECX(), edx = hregX86_EDX();
   2620             Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True;
   2621             HRegUsage u;
   2622             Int j;
   2623             initHRegUsage(&u);
   2624             addRegUsage_X86AMode(&u,  i->Xin.Store.dst);
   2625             for (j = 0; j < u.n_used; j++) {
   2626                HReg r = u.hreg[j];
   2627                if (r == eax) a_ok = False;
   2628                if (r == ebx) b_ok = False;
   2629                if (r == ecx) c_ok = False;
   2630                if (r == edx) d_ok = False;
   2631             }
   2632             if (a_ok) swap = eax;
   2633             if (b_ok) swap = ebx;
   2634             if (c_ok) swap = ecx;
   2635             if (d_ok) swap = edx;
   2636             vassert(swap != INVALID_HREG);
   2637             /* xchgl %source, %swap. Could do better if swap is %eax. */
   2638             *p++ = 0x87;
   2639             p = doAMode_R(p, i->Xin.Store.src, swap);
   2640             /* movb lo8{%swap}, (dst) */
   2641             *p++ = 0x88;
   2642             p = doAMode_M(p, swap, i->Xin.Store.dst);
   2643             /* xchgl %source, %swap. Could do better if swap is %eax. */
   2644             *p++ = 0x87;
   2645             p = doAMode_R(p, i->Xin.Store.src, swap);
   2646             goto done;
   2647          }
   2648       } /* if (i->Xin.Store.sz == 1) */
   2649       break;
   2650 
   2651    case Xin_FpUnary:
   2652       /* gop %src, %dst
   2653          --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
   2654       */
   2655       p = do_ffree_st7(p);
   2656       p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
   2657       p = do_fop1_st(p, i->Xin.FpUnary.op);
   2658       p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
   2659       goto done;
   2660 
   2661    case Xin_FpBinary:
   2662       if (i->Xin.FpBinary.op == Xfp_YL2X
   2663           || i->Xin.FpBinary.op == Xfp_YL2XP1) {
   2664          /* Have to do this specially. */
   2665          /* ffree %st7 ; fld %st(srcL) ;
   2666             ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
   2667          p = do_ffree_st7(p);
   2668          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2669          p = do_ffree_st7(p);
   2670          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
   2671          *p++ = 0xD9;
   2672          *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
   2673          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2674          goto done;
   2675       }
   2676       if (i->Xin.FpBinary.op == Xfp_ATAN) {
   2677          /* Have to do this specially. */
   2678          /* ffree %st7 ; fld %st(srcL) ;
   2679             ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
   2680          p = do_ffree_st7(p);
   2681          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2682          p = do_ffree_st7(p);
   2683          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
   2684          *p++ = 0xD9; *p++ = 0xF3;
   2685          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2686          goto done;
   2687       }
   2688       if (i->Xin.FpBinary.op == Xfp_PREM
   2689           || i->Xin.FpBinary.op == Xfp_PREM1
   2690           || i->Xin.FpBinary.op == Xfp_SCALE) {
   2691          /* Have to do this specially. */
   2692          /* ffree %st7 ; fld %st(srcR) ;
   2693             ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
   2694             fincstp ; ffree %st7 */
   2695          p = do_ffree_st7(p);
   2696          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
   2697          p = do_ffree_st7(p);
   2698          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
   2699          *p++ = 0xD9;
   2700          switch (i->Xin.FpBinary.op) {
   2701             case Xfp_PREM: *p++ = 0xF8; break;
   2702             case Xfp_PREM1: *p++ = 0xF5; break;
   2703             case Xfp_SCALE: *p++ =  0xFD; break;
   2704             default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
   2705          }
   2706          p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
   2707          *p++ = 0xD9; *p++ = 0xF7;
   2708          p = do_ffree_st7(p);
   2709          goto done;
   2710       }
   2711       /* General case */
   2712       /* gop %srcL, %srcR, %dst
   2713          --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
   2714       */
   2715       p = do_ffree_st7(p);
   2716       p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2717       p = do_fop2_st(p, i->Xin.FpBinary.op,
   2718                         1+hregNumber(i->Xin.FpBinary.srcR));
   2719       p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2720       goto done;
   2721 
   2722    case Xin_FpLdSt:
   2723       if (i->Xin.FpLdSt.isLoad) {
   2724          /* Load from memory into %fakeN.
   2725             --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
   2726          */
   2727          p = do_ffree_st7(p);
   2728          switch (i->Xin.FpLdSt.sz) {
   2729             case 4:
   2730                *p++ = 0xD9;
   2731                p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
   2732                break;
   2733             case 8:
   2734                *p++ = 0xDD;
   2735                p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
   2736                break;
   2737             case 10:
   2738                *p++ = 0xDB;
   2739                p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
   2740                break;
   2741             default:
   2742                vpanic("emitX86Instr(FpLdSt,load)");
   2743          }
   2744          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
   2745          goto done;
   2746       } else {
   2747          /* Store from %fakeN into memory.
   2748             --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
   2749 	 */
   2750          p = do_ffree_st7(p);
   2751          p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
   2752          switch (i->Xin.FpLdSt.sz) {
   2753             case 4:
   2754                *p++ = 0xD9;
   2755                p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
   2756                break;
   2757             case 8:
   2758                *p++ = 0xDD;
   2759                p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
   2760                break;
   2761             case 10:
   2762                *p++ = 0xDB;
   2763                p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
   2764                break;
   2765             default:
   2766                vpanic("emitX86Instr(FpLdSt,store)");
   2767          }
   2768          goto done;
   2769       }
   2770       break;
   2771 
   2772    case Xin_FpLdStI:
   2773       if (i->Xin.FpLdStI.isLoad) {
   2774          /* Load from memory into %fakeN, converting from an int.
   2775             --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
   2776          */
   2777          switch (i->Xin.FpLdStI.sz) {
   2778             case 8:  opc = 0xDF; subopc_imm = 5; break;
   2779             case 4:  opc = 0xDB; subopc_imm = 0; break;
   2780             case 2:  vassert(0); opc = 0xDF; subopc_imm = 0; break;
   2781             default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
   2782          }
   2783          p = do_ffree_st7(p);
   2784          *p++ = toUChar(opc);
   2785          p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
   2786          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
   2787          goto done;
   2788       } else {
   2789          /* Store from %fakeN into memory, converting to an int.
   2790             --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
   2791 	 */
   2792          switch (i->Xin.FpLdStI.sz) {
   2793             case 8:  opc = 0xDF; subopc_imm = 7; break;
   2794             case 4:  opc = 0xDB; subopc_imm = 3; break;
   2795             case 2:  opc = 0xDF; subopc_imm = 3; break;
   2796             default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
   2797          }
   2798          p = do_ffree_st7(p);
   2799          p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
   2800          *p++ = toUChar(opc);
   2801          p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
   2802          goto done;
   2803       }
   2804       break;
   2805 
   2806    case Xin_Fp64to32:
   2807       /* ffree %st7 ; fld %st(src) */
   2808       p = do_ffree_st7(p);
   2809       p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
   2810       /* subl $4, %esp */
   2811       *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
   2812       /* fstps (%esp) */
   2813       *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
   2814       /* flds (%esp) */
   2815       *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
   2816       /* addl $4, %esp */
   2817       *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
   2818       /* fstp %st(1+dst) */
   2819       p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
   2820       goto done;
   2821 
   2822    case Xin_FpCMov:
   2823       /* jmp fwds if !condition */
   2824       *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
   2825       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   2826       ptmp = p;
   2827 
   2828       /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
   2829       p = do_ffree_st7(p);
   2830       p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
   2831       p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
   2832 
   2833       /* Fill in the jump offset. */
   2834       *(ptmp-1) = toUChar(p - ptmp);
   2835       goto done;
   2836 
   2837    case Xin_FpLdCW:
   2838       *p++ = 0xD9;
   2839       p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr);
   2840       goto done;
   2841 
   2842    case Xin_FpStSW_AX:
   2843       /* note, this emits fnstsw %ax, not fstsw %ax */
   2844       *p++ = 0xDF;
   2845       *p++ = 0xE0;
   2846       goto done;
   2847 
   2848    case Xin_FpCmp:
   2849       /* gcmp %fL, %fR, %dst
   2850          -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
   2851             fnstsw %ax ; movl %eax, %dst
   2852       */
   2853       /* ffree %st7 */
   2854       p = do_ffree_st7(p);
   2855       /* fpush %fL */
   2856       p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
   2857       /* fucomp %(fR+1) */
   2858       *p++ = 0xDD;
   2859       *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))));
   2860       /* fnstsw %ax */
   2861       *p++ = 0xDF;
   2862       *p++ = 0xE0;
   2863       /*  movl %eax, %dst */
   2864       *p++ = 0x89;
   2865       p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
   2866       goto done;
   2867 
   2868    case Xin_SseConst: {
   2869       UShort con = i->Xin.SseConst.con;
   2870       p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
   2871       p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
   2872       p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
   2873       p = push_word_from_tags(p, toUShort(con & 0xF));
   2874       /* movl (%esp), %xmm-dst */
   2875       *p++ = 0x0F;
   2876       *p++ = 0x10;
   2877       *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)));
   2878       *p++ = 0x24;
   2879       /* addl $16, %esp */
   2880       *p++ = 0x83;
   2881       *p++ = 0xC4;
   2882       *p++ = 0x10;
   2883       goto done;
   2884    }
   2885 
   2886    case Xin_SseLdSt:
   2887       *p++ = 0x0F;
   2888       *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
   2889       p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr);
   2890       goto done;
   2891 
   2892    case Xin_SseLdzLO:
   2893       vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
   2894       /* movs[sd] amode, %xmm-dst */
   2895       *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
   2896       *p++ = 0x0F;
   2897       *p++ = 0x10;
   2898       p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)),
   2899                        i->Xin.SseLdzLO.addr);
   2900       goto done;
   2901 
   2902    case Xin_Sse32Fx4:
   2903       xtra = 0;
   2904       *p++ = 0x0F;
   2905       switch (i->Xin.Sse32Fx4.op) {
   2906          case Xsse_ADDF:   *p++ = 0x58; break;
   2907          case Xsse_DIVF:   *p++ = 0x5E; break;
   2908          case Xsse_MAXF:   *p++ = 0x5F; break;
   2909          case Xsse_MINF:   *p++ = 0x5D; break;
   2910          case Xsse_MULF:   *p++ = 0x59; break;
   2911          case Xsse_RCPF:   *p++ = 0x53; break;
   2912          case Xsse_RSQRTF: *p++ = 0x52; break;
   2913          case Xsse_SQRTF:  *p++ = 0x51; break;
   2914          case Xsse_SUBF:   *p++ = 0x5C; break;
   2915          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   2916          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   2917          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   2918          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   2919          default: goto bad;
   2920       }
   2921       p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)),
   2922                        fake(vregNo(i->Xin.Sse32Fx4.src)) );
   2923       if (xtra & 0x100)
   2924          *p++ = toUChar(xtra & 0xFF);
   2925       goto done;
   2926 
   2927    case Xin_Sse64Fx2:
   2928       xtra = 0;
   2929       *p++ = 0x66;
   2930       *p++ = 0x0F;
   2931       switch (i->Xin.Sse64Fx2.op) {
   2932          case Xsse_ADDF:   *p++ = 0x58; break;
   2933          case Xsse_DIVF:   *p++ = 0x5E; break;
   2934          case Xsse_MAXF:   *p++ = 0x5F; break;
   2935          case Xsse_MINF:   *p++ = 0x5D; break;
   2936          case Xsse_MULF:   *p++ = 0x59; break;
   2937          case Xsse_RCPF:   *p++ = 0x53; break;
   2938          case Xsse_RSQRTF: *p++ = 0x52; break;
   2939          case Xsse_SQRTF:  *p++ = 0x51; break;
   2940          case Xsse_SUBF:   *p++ = 0x5C; break;
   2941          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   2942          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   2943          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   2944          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   2945          default: goto bad;
   2946       }
   2947       p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)),
   2948                        fake(vregNo(i->Xin.Sse64Fx2.src)) );
   2949       if (xtra & 0x100)
   2950          *p++ = toUChar(xtra & 0xFF);
   2951       goto done;
   2952 
   2953    case Xin_Sse32FLo:
   2954       xtra = 0;
   2955       *p++ = 0xF3;
   2956       *p++ = 0x0F;
   2957       switch (i->Xin.Sse32FLo.op) {
   2958          case Xsse_ADDF:   *p++ = 0x58; break;
   2959          case Xsse_DIVF:   *p++ = 0x5E; break;
   2960          case Xsse_MAXF:   *p++ = 0x5F; break;
   2961          case Xsse_MINF:   *p++ = 0x5D; break;
   2962          case Xsse_MULF:   *p++ = 0x59; break;
   2963          case Xsse_RCPF:   *p++ = 0x53; break;
   2964          case Xsse_RSQRTF: *p++ = 0x52; break;
   2965          case Xsse_SQRTF:  *p++ = 0x51; break;
   2966          case Xsse_SUBF:   *p++ = 0x5C; break;
   2967          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   2968          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   2969          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   2970          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   2971          default: goto bad;
   2972       }
   2973       p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)),
   2974                        fake(vregNo(i->Xin.Sse32FLo.src)) );
   2975       if (xtra & 0x100)
   2976          *p++ = toUChar(xtra & 0xFF);
   2977       goto done;
   2978 
   2979    case Xin_Sse64FLo:
   2980       xtra = 0;
   2981       *p++ = 0xF2;
   2982       *p++ = 0x0F;
   2983       switch (i->Xin.Sse64FLo.op) {
   2984          case Xsse_ADDF:   *p++ = 0x58; break;
   2985          case Xsse_DIVF:   *p++ = 0x5E; break;
   2986          case Xsse_MAXF:   *p++ = 0x5F; break;
   2987          case Xsse_MINF:   *p++ = 0x5D; break;
   2988          case Xsse_MULF:   *p++ = 0x59; break;
   2989          case Xsse_RCPF:   *p++ = 0x53; break;
   2990          case Xsse_RSQRTF: *p++ = 0x52; break;
   2991          case Xsse_SQRTF:  *p++ = 0x51; break;
   2992          case Xsse_SUBF:   *p++ = 0x5C; break;
   2993          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   2994          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   2995          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   2996          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   2997          default: goto bad;
   2998       }
   2999       p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)),
   3000                        fake(vregNo(i->Xin.Sse64FLo.src)) );
   3001       if (xtra & 0x100)
   3002          *p++ = toUChar(xtra & 0xFF);
   3003       goto done;
   3004 
   3005    case Xin_SseReRg:
   3006 #     define XX(_n) *p++ = (_n)
   3007       switch (i->Xin.SseReRg.op) {
   3008          case Xsse_MOV:     /*movups*/ XX(0x0F); XX(0x10); break;
   3009          case Xsse_OR:                 XX(0x0F); XX(0x56); break;
   3010          case Xsse_XOR:                XX(0x0F); XX(0x57); break;
   3011          case Xsse_AND:                XX(0x0F); XX(0x54); break;
   3012          case Xsse_PACKSSD:  XX(0x66); XX(0x0F); XX(0x6B); break;
   3013          case Xsse_PACKSSW:  XX(0x66); XX(0x0F); XX(0x63); break;
   3014          case Xsse_PACKUSW:  XX(0x66); XX(0x0F); XX(0x67); break;
   3015          case Xsse_ADD8:     XX(0x66); XX(0x0F); XX(0xFC); break;
   3016          case Xsse_ADD16:    XX(0x66); XX(0x0F); XX(0xFD); break;
   3017          case Xsse_ADD32:    XX(0x66); XX(0x0F); XX(0xFE); break;
   3018          case Xsse_ADD64:    XX(0x66); XX(0x0F); XX(0xD4); break;
   3019          case Xsse_QADD8S:   XX(0x66); XX(0x0F); XX(0xEC); break;
   3020          case Xsse_QADD16S:  XX(0x66); XX(0x0F); XX(0xED); break;
   3021          case Xsse_QADD8U:   XX(0x66); XX(0x0F); XX(0xDC); break;
   3022          case Xsse_QADD16U:  XX(0x66); XX(0x0F); XX(0xDD); break;
   3023          case Xsse_AVG8U:    XX(0x66); XX(0x0F); XX(0xE0); break;
   3024          case Xsse_AVG16U:   XX(0x66); XX(0x0F); XX(0xE3); break;
   3025          case Xsse_CMPEQ8:   XX(0x66); XX(0x0F); XX(0x74); break;
   3026          case Xsse_CMPEQ16:  XX(0x66); XX(0x0F); XX(0x75); break;
   3027          case Xsse_CMPEQ32:  XX(0x66); XX(0x0F); XX(0x76); break;
   3028          case Xsse_CMPGT8S:  XX(0x66); XX(0x0F); XX(0x64); break;
   3029          case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
   3030          case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
   3031          case Xsse_MAX16S:   XX(0x66); XX(0x0F); XX(0xEE); break;
   3032          case Xsse_MAX8U:    XX(0x66); XX(0x0F); XX(0xDE); break;
   3033          case Xsse_MIN16S:   XX(0x66); XX(0x0F); XX(0xEA); break;
   3034          case Xsse_MIN8U:    XX(0x66); XX(0x0F); XX(0xDA); break;
   3035          case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
   3036          case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
   3037          case Xsse_MUL16:    XX(0x66); XX(0x0F); XX(0xD5); break;
   3038          case Xsse_SHL16:    XX(0x66); XX(0x0F); XX(0xF1); break;
   3039          case Xsse_SHL32:    XX(0x66); XX(0x0F); XX(0xF2); break;
   3040          case Xsse_SHL64:    XX(0x66); XX(0x0F); XX(0xF3); break;
   3041          case Xsse_SAR16:    XX(0x66); XX(0x0F); XX(0xE1); break;
   3042          case Xsse_SAR32:    XX(0x66); XX(0x0F); XX(0xE2); break;
   3043          case Xsse_SHR16:    XX(0x66); XX(0x0F); XX(0xD1); break;
   3044          case Xsse_SHR32:    XX(0x66); XX(0x0F); XX(0xD2); break;
   3045          case Xsse_SHR64:    XX(0x66); XX(0x0F); XX(0xD3); break;
   3046          case Xsse_SUB8:     XX(0x66); XX(0x0F); XX(0xF8); break;
   3047          case Xsse_SUB16:    XX(0x66); XX(0x0F); XX(0xF9); break;
   3048          case Xsse_SUB32:    XX(0x66); XX(0x0F); XX(0xFA); break;
   3049          case Xsse_SUB64:    XX(0x66); XX(0x0F); XX(0xFB); break;
   3050          case Xsse_QSUB8S:   XX(0x66); XX(0x0F); XX(0xE8); break;
   3051          case Xsse_QSUB16S:  XX(0x66); XX(0x0F); XX(0xE9); break;
   3052          case Xsse_QSUB8U:   XX(0x66); XX(0x0F); XX(0xD8); break;
   3053          case Xsse_QSUB16U:  XX(0x66); XX(0x0F); XX(0xD9); break;
   3054          case Xsse_UNPCKHB:  XX(0x66); XX(0x0F); XX(0x68); break;
   3055          case Xsse_UNPCKHW:  XX(0x66); XX(0x0F); XX(0x69); break;
   3056          case Xsse_UNPCKHD:  XX(0x66); XX(0x0F); XX(0x6A); break;
   3057          case Xsse_UNPCKHQ:  XX(0x66); XX(0x0F); XX(0x6D); break;
   3058          case Xsse_UNPCKLB:  XX(0x66); XX(0x0F); XX(0x60); break;
   3059          case Xsse_UNPCKLW:  XX(0x66); XX(0x0F); XX(0x61); break;
   3060          case Xsse_UNPCKLD:  XX(0x66); XX(0x0F); XX(0x62); break;
   3061          case Xsse_UNPCKLQ:  XX(0x66); XX(0x0F); XX(0x6C); break;
   3062          default: goto bad;
   3063       }
   3064       p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)),
   3065                        fake(vregNo(i->Xin.SseReRg.src)) );
   3066 #     undef XX
   3067       goto done;
   3068 
   3069    case Xin_SseCMov:
   3070       /* jmp fwds if !condition */
   3071       *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
   3072       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   3073       ptmp = p;
   3074 
   3075       /* movaps %src, %dst */
   3076       *p++ = 0x0F;
   3077       *p++ = 0x28;
   3078       p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)),
   3079                        fake(vregNo(i->Xin.SseCMov.src)) );
   3080 
   3081       /* Fill in the jump offset. */
   3082       *(ptmp-1) = toUChar(p - ptmp);
   3083       goto done;
   3084 
   3085    case Xin_SseShuf:
   3086       *p++ = 0x66;
   3087       *p++ = 0x0F;
   3088       *p++ = 0x70;
   3089       p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)),
   3090                        fake(vregNo(i->Xin.SseShuf.src)) );
   3091       *p++ = (UChar)(i->Xin.SseShuf.order);
   3092       goto done;
   3093 
   3094    default:
   3095       goto bad;
   3096    }
   3097 
   3098   bad:
   3099    ppX86Instr(i, mode64);
   3100    vpanic("emit_X86Instr");
   3101    /*NOTREACHED*/
   3102 
   3103   done:
   3104    vassert(p - &buf[0] <= 32);
   3105    return p - &buf[0];
   3106 
   3107 #  undef fake
   3108 }
   3109 
   3110 /*---------------------------------------------------------------*/
   3111 /*--- end                                     host_x86_defs.c ---*/
   3112 /*---------------------------------------------------------------*/
   3113