Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_x86_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2015 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_x86_defs.h"
     43 
     44 
     45 /* --------- Registers. --------- */
     46 
     47 const RRegUniverse* getRRegUniverse_X86 ( void )
     48 {
     49    /* The real-register universe is a big constant, so we just want to
     50       initialise it once. */
     51    static RRegUniverse rRegUniverse_X86;
     52    static Bool         rRegUniverse_X86_initted = False;
     53 
     54    /* Handy shorthand, nothing more */
     55    RRegUniverse* ru = &rRegUniverse_X86;
     56 
     57    /* This isn't thread-safe.  Sigh. */
     58    if (LIKELY(rRegUniverse_X86_initted))
     59       return ru;
     60 
     61    RRegUniverse__init(ru);
     62 
     63    /* Add the registers.  The initial segment of this array must be
     64       those available for allocation by reg-alloc, and those that
     65       follow are not available for allocation. */
     66    ru->regs[ru->size++] = hregX86_EAX();
     67    ru->regs[ru->size++] = hregX86_EBX();
     68    ru->regs[ru->size++] = hregX86_ECX();
     69    ru->regs[ru->size++] = hregX86_EDX();
     70    ru->regs[ru->size++] = hregX86_ESI();
     71    ru->regs[ru->size++] = hregX86_EDI();
     72    ru->regs[ru->size++] = hregX86_FAKE0();
     73    ru->regs[ru->size++] = hregX86_FAKE1();
     74    ru->regs[ru->size++] = hregX86_FAKE2();
     75    ru->regs[ru->size++] = hregX86_FAKE3();
     76    ru->regs[ru->size++] = hregX86_FAKE4();
     77    ru->regs[ru->size++] = hregX86_FAKE5();
     78    ru->regs[ru->size++] = hregX86_XMM0();
     79    ru->regs[ru->size++] = hregX86_XMM1();
     80    ru->regs[ru->size++] = hregX86_XMM2();
     81    ru->regs[ru->size++] = hregX86_XMM3();
     82    ru->regs[ru->size++] = hregX86_XMM4();
     83    ru->regs[ru->size++] = hregX86_XMM5();
     84    ru->regs[ru->size++] = hregX86_XMM6();
     85    ru->regs[ru->size++] = hregX86_XMM7();
     86    ru->allocable = ru->size;
     87    /* And other regs, not available to the allocator. */
     88    ru->regs[ru->size++] = hregX86_ESP();
     89    ru->regs[ru->size++] = hregX86_EBP();
     90 
     91    rRegUniverse_X86_initted = True;
     92 
     93    RRegUniverse__check_is_sane(ru);
     94    return ru;
     95 }
     96 
     97 
     98 void ppHRegX86 ( HReg reg )
     99 {
    100    Int r;
    101    static const HChar* ireg32_names[8]
    102      = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
    103    /* Be generic for all virtual regs. */
    104    if (hregIsVirtual(reg)) {
    105       ppHReg(reg);
    106       return;
    107    }
    108    /* But specific for real regs. */
    109    switch (hregClass(reg)) {
    110       case HRcInt32:
    111          r = hregEncoding(reg);
    112          vassert(r >= 0 && r < 8);
    113          vex_printf("%s", ireg32_names[r]);
    114          return;
    115       case HRcFlt64:
    116          r = hregEncoding(reg);
    117          vassert(r >= 0 && r < 6);
    118          vex_printf("%%fake%d", r);
    119          return;
    120       case HRcVec128:
    121          r = hregEncoding(reg);
    122          vassert(r >= 0 && r < 8);
    123          vex_printf("%%xmm%d", r);
    124          return;
    125       default:
    126          vpanic("ppHRegX86");
    127    }
    128 }
    129 
    130 
    131 /* --------- Condition codes, Intel encoding. --------- */
    132 
    133 const HChar* showX86CondCode ( X86CondCode cond )
    134 {
    135    switch (cond) {
    136       case Xcc_O:      return "o";
    137       case Xcc_NO:     return "no";
    138       case Xcc_B:      return "b";
    139       case Xcc_NB:     return "nb";
    140       case Xcc_Z:      return "z";
    141       case Xcc_NZ:     return "nz";
    142       case Xcc_BE:     return "be";
    143       case Xcc_NBE:    return "nbe";
    144       case Xcc_S:      return "s";
    145       case Xcc_NS:     return "ns";
    146       case Xcc_P:      return "p";
    147       case Xcc_NP:     return "np";
    148       case Xcc_L:      return "l";
    149       case Xcc_NL:     return "nl";
    150       case Xcc_LE:     return "le";
    151       case Xcc_NLE:    return "nle";
    152       case Xcc_ALWAYS: return "ALWAYS";
    153       default: vpanic("ppX86CondCode");
    154    }
    155 }
    156 
    157 
    158 /* --------- X86AMode: memory address expressions. --------- */
    159 
    160 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
    161    X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode));
    162    am->tag = Xam_IR;
    163    am->Xam.IR.imm = imm32;
    164    am->Xam.IR.reg = reg;
    165    return am;
    166 }
    167 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
    168    X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode));
    169    am->tag = Xam_IRRS;
    170    am->Xam.IRRS.imm = imm32;
    171    am->Xam.IRRS.base = base;
    172    am->Xam.IRRS.index = indEx;
    173    am->Xam.IRRS.shift = shift;
    174    vassert(shift >= 0 && shift <= 3);
    175    return am;
    176 }
    177 
    178 X86AMode* dopyX86AMode ( X86AMode* am ) {
    179    switch (am->tag) {
    180       case Xam_IR:
    181          return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
    182       case Xam_IRRS:
    183          return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
    184                                am->Xam.IRRS.index, am->Xam.IRRS.shift );
    185       default:
    186          vpanic("dopyX86AMode");
    187    }
    188 }
    189 
    190 void ppX86AMode ( X86AMode* am ) {
    191    switch (am->tag) {
    192       case Xam_IR:
    193          if (am->Xam.IR.imm == 0)
    194             vex_printf("(");
    195          else
    196             vex_printf("0x%x(", am->Xam.IR.imm);
    197          ppHRegX86(am->Xam.IR.reg);
    198          vex_printf(")");
    199          return;
    200       case Xam_IRRS:
    201          vex_printf("0x%x(", am->Xam.IRRS.imm);
    202          ppHRegX86(am->Xam.IRRS.base);
    203          vex_printf(",");
    204          ppHRegX86(am->Xam.IRRS.index);
    205          vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
    206          return;
    207       default:
    208          vpanic("ppX86AMode");
    209    }
    210 }
    211 
    212 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
    213    switch (am->tag) {
    214       case Xam_IR:
    215          addHRegUse(u, HRmRead, am->Xam.IR.reg);
    216          return;
    217       case Xam_IRRS:
    218          addHRegUse(u, HRmRead, am->Xam.IRRS.base);
    219          addHRegUse(u, HRmRead, am->Xam.IRRS.index);
    220          return;
    221       default:
    222          vpanic("addRegUsage_X86AMode");
    223    }
    224 }
    225 
    226 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
    227    switch (am->tag) {
    228       case Xam_IR:
    229          am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
    230          return;
    231       case Xam_IRRS:
    232          am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
    233          am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
    234          return;
    235       default:
    236          vpanic("mapRegs_X86AMode");
    237    }
    238 }
    239 
    240 /* --------- Operand, which can be reg, immediate or memory. --------- */
    241 
    242 X86RMI* X86RMI_Imm ( UInt imm32 ) {
    243    X86RMI* op         = LibVEX_Alloc_inline(sizeof(X86RMI));
    244    op->tag            = Xrmi_Imm;
    245    op->Xrmi.Imm.imm32 = imm32;
    246    return op;
    247 }
    248 X86RMI* X86RMI_Reg ( HReg reg ) {
    249    X86RMI* op       = LibVEX_Alloc_inline(sizeof(X86RMI));
    250    op->tag          = Xrmi_Reg;
    251    op->Xrmi.Reg.reg = reg;
    252    return op;
    253 }
    254 X86RMI* X86RMI_Mem ( X86AMode* am ) {
    255    X86RMI* op      = LibVEX_Alloc_inline(sizeof(X86RMI));
    256    op->tag         = Xrmi_Mem;
    257    op->Xrmi.Mem.am = am;
    258    return op;
    259 }
    260 
    261 void ppX86RMI ( X86RMI* op ) {
    262    switch (op->tag) {
    263       case Xrmi_Imm:
    264          vex_printf("$0x%x", op->Xrmi.Imm.imm32);
    265          return;
    266       case Xrmi_Reg:
    267          ppHRegX86(op->Xrmi.Reg.reg);
    268          return;
    269       case Xrmi_Mem:
    270          ppX86AMode(op->Xrmi.Mem.am);
    271          return;
    272      default:
    273          vpanic("ppX86RMI");
    274    }
    275 }
    276 
    277 /* An X86RMI can only be used in a "read" context (what would it mean
    278    to write or modify a literal?) and so we enumerate its registers
    279    accordingly. */
    280 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
    281    switch (op->tag) {
    282       case Xrmi_Imm:
    283          return;
    284       case Xrmi_Reg:
    285          addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
    286          return;
    287       case Xrmi_Mem:
    288          addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
    289          return;
    290       default:
    291          vpanic("addRegUsage_X86RMI");
    292    }
    293 }
    294 
    295 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
    296    switch (op->tag) {
    297       case Xrmi_Imm:
    298          return;
    299       case Xrmi_Reg:
    300          op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
    301          return;
    302       case Xrmi_Mem:
    303          mapRegs_X86AMode(m, op->Xrmi.Mem.am);
    304          return;
    305       default:
    306          vpanic("mapRegs_X86RMI");
    307    }
    308 }
    309 
    310 
    311 /* --------- Operand, which can be reg or immediate only. --------- */
    312 
    313 X86RI* X86RI_Imm ( UInt imm32 ) {
    314    X86RI* op         = LibVEX_Alloc_inline(sizeof(X86RI));
    315    op->tag           = Xri_Imm;
    316    op->Xri.Imm.imm32 = imm32;
    317    return op;
    318 }
    319 X86RI* X86RI_Reg ( HReg reg ) {
    320    X86RI* op       = LibVEX_Alloc_inline(sizeof(X86RI));
    321    op->tag         = Xri_Reg;
    322    op->Xri.Reg.reg = reg;
    323    return op;
    324 }
    325 
    326 void ppX86RI ( X86RI* op ) {
    327    switch (op->tag) {
    328       case Xri_Imm:
    329          vex_printf("$0x%x", op->Xri.Imm.imm32);
    330          return;
    331       case Xri_Reg:
    332          ppHRegX86(op->Xri.Reg.reg);
    333          return;
    334      default:
    335          vpanic("ppX86RI");
    336    }
    337 }
    338 
    339 /* An X86RI can only be used in a "read" context (what would it mean
    340    to write or modify a literal?) and so we enumerate its registers
    341    accordingly. */
    342 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
    343    switch (op->tag) {
    344       case Xri_Imm:
    345          return;
    346       case Xri_Reg:
    347          addHRegUse(u, HRmRead, op->Xri.Reg.reg);
    348          return;
    349       default:
    350          vpanic("addRegUsage_X86RI");
    351    }
    352 }
    353 
    354 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
    355    switch (op->tag) {
    356       case Xri_Imm:
    357          return;
    358       case Xri_Reg:
    359          op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
    360          return;
    361       default:
    362          vpanic("mapRegs_X86RI");
    363    }
    364 }
    365 
    366 
    367 /* --------- Operand, which can be reg or memory only. --------- */
    368 
    369 X86RM* X86RM_Reg ( HReg reg ) {
    370    X86RM* op       = LibVEX_Alloc_inline(sizeof(X86RM));
    371    op->tag         = Xrm_Reg;
    372    op->Xrm.Reg.reg = reg;
    373    return op;
    374 }
    375 X86RM* X86RM_Mem ( X86AMode* am ) {
    376    X86RM* op      = LibVEX_Alloc_inline(sizeof(X86RM));
    377    op->tag        = Xrm_Mem;
    378    op->Xrm.Mem.am = am;
    379    return op;
    380 }
    381 
    382 void ppX86RM ( X86RM* op ) {
    383    switch (op->tag) {
    384       case Xrm_Mem:
    385          ppX86AMode(op->Xrm.Mem.am);
    386          return;
    387       case Xrm_Reg:
    388          ppHRegX86(op->Xrm.Reg.reg);
    389          return;
    390      default:
    391          vpanic("ppX86RM");
    392    }
    393 }
    394 
    395 /* Because an X86RM can be both a source or destination operand, we
    396    have to supply a mode -- pertaining to the operand as a whole --
    397    indicating how it's being used. */
    398 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
    399    switch (op->tag) {
    400       case Xrm_Mem:
    401          /* Memory is read, written or modified.  So we just want to
    402             know the regs read by the amode. */
    403          addRegUsage_X86AMode(u, op->Xrm.Mem.am);
    404          return;
    405       case Xrm_Reg:
    406          /* reg is read, written or modified.  Add it in the
    407             appropriate way. */
    408          addHRegUse(u, mode, op->Xrm.Reg.reg);
    409          return;
    410      default:
    411          vpanic("addRegUsage_X86RM");
    412    }
    413 }
    414 
    415 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
    416 {
    417    switch (op->tag) {
    418       case Xrm_Mem:
    419          mapRegs_X86AMode(m, op->Xrm.Mem.am);
    420          return;
    421       case Xrm_Reg:
    422          op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
    423          return;
    424      default:
    425          vpanic("mapRegs_X86RM");
    426    }
    427 }
    428 
    429 
    430 /* --------- Instructions. --------- */
    431 
    432 const HChar* showX86UnaryOp ( X86UnaryOp op ) {
    433    switch (op) {
    434       case Xun_NOT: return "not";
    435       case Xun_NEG: return "neg";
    436       default: vpanic("showX86UnaryOp");
    437    }
    438 }
    439 
    440 const HChar* showX86AluOp ( X86AluOp op ) {
    441    switch (op) {
    442       case Xalu_MOV:  return "mov";
    443       case Xalu_CMP:  return "cmp";
    444       case Xalu_ADD:  return "add";
    445       case Xalu_SUB:  return "sub";
    446       case Xalu_ADC:  return "adc";
    447       case Xalu_SBB:  return "sbb";
    448       case Xalu_AND:  return "and";
    449       case Xalu_OR:   return "or";
    450       case Xalu_XOR:  return "xor";
    451       case Xalu_MUL:  return "mul";
    452       default: vpanic("showX86AluOp");
    453    }
    454 }
    455 
    456 const HChar* showX86ShiftOp ( X86ShiftOp op ) {
    457    switch (op) {
    458       case Xsh_SHL: return "shl";
    459       case Xsh_SHR: return "shr";
    460       case Xsh_SAR: return "sar";
    461       default: vpanic("showX86ShiftOp");
    462    }
    463 }
    464 
    465 const HChar* showX86FpOp ( X86FpOp op ) {
    466    switch (op) {
    467       case Xfp_ADD:    return "add";
    468       case Xfp_SUB:    return "sub";
    469       case Xfp_MUL:    return "mul";
    470       case Xfp_DIV:    return "div";
    471       case Xfp_SCALE:  return "scale";
    472       case Xfp_ATAN:   return "atan";
    473       case Xfp_YL2X:   return "yl2x";
    474       case Xfp_YL2XP1: return "yl2xp1";
    475       case Xfp_PREM:   return "prem";
    476       case Xfp_PREM1:  return "prem1";
    477       case Xfp_SQRT:   return "sqrt";
    478       case Xfp_ABS:    return "abs";
    479       case Xfp_NEG:    return "chs";
    480       case Xfp_MOV:    return "mov";
    481       case Xfp_SIN:    return "sin";
    482       case Xfp_COS:    return "cos";
    483       case Xfp_TAN:    return "tan";
    484       case Xfp_ROUND:  return "round";
    485       case Xfp_2XM1:   return "2xm1";
    486       default: vpanic("showX86FpOp");
    487    }
    488 }
    489 
    490 const HChar* showX86SseOp ( X86SseOp op ) {
    491    switch (op) {
    492       case Xsse_MOV:      return "mov(?!)";
    493       case Xsse_ADDF:     return "add";
    494       case Xsse_SUBF:     return "sub";
    495       case Xsse_MULF:     return "mul";
    496       case Xsse_DIVF:     return "div";
    497       case Xsse_MAXF:     return "max";
    498       case Xsse_MINF:     return "min";
    499       case Xsse_CMPEQF:   return "cmpFeq";
    500       case Xsse_CMPLTF:   return "cmpFlt";
    501       case Xsse_CMPLEF:   return "cmpFle";
    502       case Xsse_CMPUNF:   return "cmpFun";
    503       case Xsse_RCPF:     return "rcp";
    504       case Xsse_RSQRTF:   return "rsqrt";
    505       case Xsse_SQRTF:    return "sqrt";
    506       case Xsse_AND:      return "and";
    507       case Xsse_OR:       return "or";
    508       case Xsse_XOR:      return "xor";
    509       case Xsse_ANDN:     return "andn";
    510       case Xsse_ADD8:     return "paddb";
    511       case Xsse_ADD16:    return "paddw";
    512       case Xsse_ADD32:    return "paddd";
    513       case Xsse_ADD64:    return "paddq";
    514       case Xsse_QADD8U:   return "paddusb";
    515       case Xsse_QADD16U:  return "paddusw";
    516       case Xsse_QADD8S:   return "paddsb";
    517       case Xsse_QADD16S:  return "paddsw";
    518       case Xsse_SUB8:     return "psubb";
    519       case Xsse_SUB16:    return "psubw";
    520       case Xsse_SUB32:    return "psubd";
    521       case Xsse_SUB64:    return "psubq";
    522       case Xsse_QSUB8U:   return "psubusb";
    523       case Xsse_QSUB16U:  return "psubusw";
    524       case Xsse_QSUB8S:   return "psubsb";
    525       case Xsse_QSUB16S:  return "psubsw";
    526       case Xsse_MUL16:    return "pmullw";
    527       case Xsse_MULHI16U: return "pmulhuw";
    528       case Xsse_MULHI16S: return "pmulhw";
    529       case Xsse_AVG8U:    return "pavgb";
    530       case Xsse_AVG16U:   return "pavgw";
    531       case Xsse_MAX16S:   return "pmaxw";
    532       case Xsse_MAX8U:    return "pmaxub";
    533       case Xsse_MIN16S:   return "pminw";
    534       case Xsse_MIN8U:    return "pminub";
    535       case Xsse_CMPEQ8:   return "pcmpeqb";
    536       case Xsse_CMPEQ16:  return "pcmpeqw";
    537       case Xsse_CMPEQ32:  return "pcmpeqd";
    538       case Xsse_CMPGT8S:  return "pcmpgtb";
    539       case Xsse_CMPGT16S: return "pcmpgtw";
    540       case Xsse_CMPGT32S: return "pcmpgtd";
    541       case Xsse_SHL16:    return "psllw";
    542       case Xsse_SHL32:    return "pslld";
    543       case Xsse_SHL64:    return "psllq";
    544       case Xsse_SHR16:    return "psrlw";
    545       case Xsse_SHR32:    return "psrld";
    546       case Xsse_SHR64:    return "psrlq";
    547       case Xsse_SAR16:    return "psraw";
    548       case Xsse_SAR32:    return "psrad";
    549       case Xsse_PACKSSD:  return "packssdw";
    550       case Xsse_PACKSSW:  return "packsswb";
    551       case Xsse_PACKUSW:  return "packuswb";
    552       case Xsse_UNPCKHB:  return "punpckhb";
    553       case Xsse_UNPCKHW:  return "punpckhw";
    554       case Xsse_UNPCKHD:  return "punpckhd";
    555       case Xsse_UNPCKHQ:  return "punpckhq";
    556       case Xsse_UNPCKLB:  return "punpcklb";
    557       case Xsse_UNPCKLW:  return "punpcklw";
    558       case Xsse_UNPCKLD:  return "punpckld";
    559       case Xsse_UNPCKLQ:  return "punpcklq";
    560       default: vpanic("showX86SseOp");
    561    }
    562 }
    563 
    564 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
    565    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
    566    i->tag            = Xin_Alu32R;
    567    i->Xin.Alu32R.op  = op;
    568    i->Xin.Alu32R.src = src;
    569    i->Xin.Alu32R.dst = dst;
    570    return i;
    571 }
    572 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
    573    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
    574    i->tag            = Xin_Alu32M;
    575    i->Xin.Alu32M.op  = op;
    576    i->Xin.Alu32M.src = src;
    577    i->Xin.Alu32M.dst = dst;
    578    vassert(op != Xalu_MUL);
    579    return i;
    580 }
    581 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
    582    X86Instr* i     = LibVEX_Alloc_inline(sizeof(X86Instr));
    583    i->tag          = Xin_Sh32;
    584    i->Xin.Sh32.op  = op;
    585    i->Xin.Sh32.src = src;
    586    i->Xin.Sh32.dst = dst;
    587    return i;
    588 }
    589 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
    590    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
    591    i->tag              = Xin_Test32;
    592    i->Xin.Test32.imm32 = imm32;
    593    i->Xin.Test32.dst   = dst;
    594    return i;
    595 }
    596 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
    597    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
    598    i->tag             = Xin_Unary32;
    599    i->Xin.Unary32.op  = op;
    600    i->Xin.Unary32.dst = dst;
    601    return i;
    602 }
    603 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
    604    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
    605    i->tag             = Xin_Lea32;
    606    i->Xin.Lea32.am    = am;
    607    i->Xin.Lea32.dst   = dst;
    608    return i;
    609 }
    610 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
    611    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
    612    i->tag             = Xin_MulL;
    613    i->Xin.MulL.syned  = syned;
    614    i->Xin.MulL.src    = src;
    615    return i;
    616 }
    617 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
    618    X86Instr* i      = LibVEX_Alloc_inline(sizeof(X86Instr));
    619    i->tag           = Xin_Div;
    620    i->Xin.Div.syned = syned;
    621    i->Xin.Div.src   = src;
    622    return i;
    623 }
    624 X86Instr* X86Instr_Sh3232  ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
    625    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
    626    i->tag            = Xin_Sh3232;
    627    i->Xin.Sh3232.op  = op;
    628    i->Xin.Sh3232.amt = amt;
    629    i->Xin.Sh3232.src = src;
    630    i->Xin.Sh3232.dst = dst;
    631    vassert(op == Xsh_SHL || op == Xsh_SHR);
    632    return i;
    633 }
    634 X86Instr* X86Instr_Push( X86RMI* src ) {
    635    X86Instr* i     = LibVEX_Alloc_inline(sizeof(X86Instr));
    636    i->tag          = Xin_Push;
    637    i->Xin.Push.src = src;
    638    return i;
    639 }
    640 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms,
    641                           RetLoc rloc ) {
    642    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
    643    i->tag               = Xin_Call;
    644    i->Xin.Call.cond     = cond;
    645    i->Xin.Call.target   = target;
    646    i->Xin.Call.regparms = regparms;
    647    i->Xin.Call.rloc     = rloc;
    648    vassert(regparms >= 0 && regparms <= 3);
    649    vassert(is_sane_RetLoc(rloc));
    650    return i;
    651 }
    652 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
    653                              X86CondCode cond, Bool toFastEP ) {
    654    X86Instr* i             = LibVEX_Alloc_inline(sizeof(X86Instr));
    655    i->tag                  = Xin_XDirect;
    656    i->Xin.XDirect.dstGA    = dstGA;
    657    i->Xin.XDirect.amEIP    = amEIP;
    658    i->Xin.XDirect.cond     = cond;
    659    i->Xin.XDirect.toFastEP = toFastEP;
    660    return i;
    661 }
    662 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
    663                             X86CondCode cond ) {
    664    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
    665    i->tag              = Xin_XIndir;
    666    i->Xin.XIndir.dstGA = dstGA;
    667    i->Xin.XIndir.amEIP = amEIP;
    668    i->Xin.XIndir.cond  = cond;
    669    return i;
    670 }
    671 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
    672                                X86CondCode cond, IRJumpKind jk ) {
    673    X86Instr* i            = LibVEX_Alloc_inline(sizeof(X86Instr));
    674    i->tag                 = Xin_XAssisted;
    675    i->Xin.XAssisted.dstGA = dstGA;
    676    i->Xin.XAssisted.amEIP = amEIP;
    677    i->Xin.XAssisted.cond  = cond;
    678    i->Xin.XAssisted.jk    = jk;
    679    return i;
    680 }
    681 X86Instr* X86Instr_CMov32  ( X86CondCode cond, X86RM* src, HReg dst ) {
    682    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
    683    i->tag             = Xin_CMov32;
    684    i->Xin.CMov32.cond = cond;
    685    i->Xin.CMov32.src  = src;
    686    i->Xin.CMov32.dst  = dst;
    687    vassert(cond != Xcc_ALWAYS);
    688    return i;
    689 }
    690 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
    691                             X86AMode* src, HReg dst ) {
    692    X86Instr* i           = LibVEX_Alloc_inline(sizeof(X86Instr));
    693    i->tag                = Xin_LoadEX;
    694    i->Xin.LoadEX.szSmall = szSmall;
    695    i->Xin.LoadEX.syned   = syned;
    696    i->Xin.LoadEX.src     = src;
    697    i->Xin.LoadEX.dst     = dst;
    698    vassert(szSmall == 1 || szSmall == 2);
    699    return i;
    700 }
    701 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
    702    X86Instr* i      = LibVEX_Alloc_inline(sizeof(X86Instr));
    703    i->tag           = Xin_Store;
    704    i->Xin.Store.sz  = sz;
    705    i->Xin.Store.src = src;
    706    i->Xin.Store.dst = dst;
    707    vassert(sz == 1 || sz == 2);
    708    return i;
    709 }
    710 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
    711    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
    712    i->tag            = Xin_Set32;
    713    i->Xin.Set32.cond = cond;
    714    i->Xin.Set32.dst  = dst;
    715    return i;
    716 }
    717 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
    718    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
    719    i->tag               = Xin_Bsfr32;
    720    i->Xin.Bsfr32.isFwds = isFwds;
    721    i->Xin.Bsfr32.src    = src;
    722    i->Xin.Bsfr32.dst    = dst;
    723    return i;
    724 }
    725 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
    726    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
    727    i->tag               = Xin_MFence;
    728    i->Xin.MFence.hwcaps = hwcaps;
    729    vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
    730                             |VEX_HWCAPS_X86_SSE1
    731                             |VEX_HWCAPS_X86_SSE2
    732                             |VEX_HWCAPS_X86_SSE3
    733                             |VEX_HWCAPS_X86_LZCNT)));
    734    return i;
    735 }
    736 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
    737    X86Instr* i      = LibVEX_Alloc_inline(sizeof(X86Instr));
    738    i->tag           = Xin_ACAS;
    739    i->Xin.ACAS.addr = addr;
    740    i->Xin.ACAS.sz   = sz;
    741    vassert(sz == 4 || sz == 2 || sz == 1);
    742    return i;
    743 }
    744 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
    745    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
    746    i->tag            = Xin_DACAS;
    747    i->Xin.DACAS.addr = addr;
    748    return i;
    749 }
    750 
    751 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
    752    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
    753    i->tag             = Xin_FpUnary;
    754    i->Xin.FpUnary.op  = op;
    755    i->Xin.FpUnary.src = src;
    756    i->Xin.FpUnary.dst = dst;
    757    return i;
    758 }
    759 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
    760    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
    761    i->tag               = Xin_FpBinary;
    762    i->Xin.FpBinary.op   = op;
    763    i->Xin.FpBinary.srcL = srcL;
    764    i->Xin.FpBinary.srcR = srcR;
    765    i->Xin.FpBinary.dst  = dst;
    766    return i;
    767 }
    768 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
    769    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
    770    i->tag               = Xin_FpLdSt;
    771    i->Xin.FpLdSt.isLoad = isLoad;
    772    i->Xin.FpLdSt.sz     = sz;
    773    i->Xin.FpLdSt.reg    = reg;
    774    i->Xin.FpLdSt.addr   = addr;
    775    vassert(sz == 4 || sz == 8 || sz == 10);
    776    return i;
    777 }
    778 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
    779                              HReg reg, X86AMode* addr ) {
    780    X86Instr* i           = LibVEX_Alloc_inline(sizeof(X86Instr));
    781    i->tag                = Xin_FpLdStI;
    782    i->Xin.FpLdStI.isLoad = isLoad;
    783    i->Xin.FpLdStI.sz     = sz;
    784    i->Xin.FpLdStI.reg    = reg;
    785    i->Xin.FpLdStI.addr   = addr;
    786    vassert(sz == 2 || sz == 4 || sz == 8);
    787    return i;
    788 }
    789 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
    790    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
    791    i->tag              = Xin_Fp64to32;
    792    i->Xin.Fp64to32.src = src;
    793    i->Xin.Fp64to32.dst = dst;
    794    return i;
    795 }
    796 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
    797    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
    798    i->tag             = Xin_FpCMov;
    799    i->Xin.FpCMov.cond = cond;
    800    i->Xin.FpCMov.src  = src;
    801    i->Xin.FpCMov.dst  = dst;
    802    vassert(cond != Xcc_ALWAYS);
    803    return i;
    804 }
    805 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
    806    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
    807    i->tag               = Xin_FpLdCW;
    808    i->Xin.FpLdCW.addr   = addr;
    809    return i;
    810 }
    811 X86Instr* X86Instr_FpStSW_AX ( void ) {
    812    X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
    813    i->tag      = Xin_FpStSW_AX;
    814    return i;
    815 }
    816 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
    817    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
    818    i->tag            = Xin_FpCmp;
    819    i->Xin.FpCmp.srcL = srcL;
    820    i->Xin.FpCmp.srcR = srcR;
    821    i->Xin.FpCmp.dst  = dst;
    822    return i;
    823 }
    824 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
    825    X86Instr* i            = LibVEX_Alloc_inline(sizeof(X86Instr));
    826    i->tag                 = Xin_SseConst;
    827    i->Xin.SseConst.con    = con;
    828    i->Xin.SseConst.dst    = dst;
    829    vassert(hregClass(dst) == HRcVec128);
    830    return i;
    831 }
    832 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
    833    X86Instr* i           = LibVEX_Alloc_inline(sizeof(X86Instr));
    834    i->tag                = Xin_SseLdSt;
    835    i->Xin.SseLdSt.isLoad = isLoad;
    836    i->Xin.SseLdSt.reg    = reg;
    837    i->Xin.SseLdSt.addr   = addr;
    838    return i;
    839 }
    840 X86Instr* X86Instr_SseLdzLO  ( Int sz, HReg reg, X86AMode* addr )
    841 {
    842    X86Instr* i           = LibVEX_Alloc_inline(sizeof(X86Instr));
    843    i->tag                = Xin_SseLdzLO;
    844    i->Xin.SseLdzLO.sz    = toUChar(sz);
    845    i->Xin.SseLdzLO.reg   = reg;
    846    i->Xin.SseLdzLO.addr  = addr;
    847    vassert(sz == 4 || sz == 8);
    848    return i;
    849 }
    850 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
    851    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
    852    i->tag              = Xin_Sse32Fx4;
    853    i->Xin.Sse32Fx4.op  = op;
    854    i->Xin.Sse32Fx4.src = src;
    855    i->Xin.Sse32Fx4.dst = dst;
    856    vassert(op != Xsse_MOV);
    857    return i;
    858 }
    859 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
    860    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
    861    i->tag              = Xin_Sse32FLo;
    862    i->Xin.Sse32FLo.op  = op;
    863    i->Xin.Sse32FLo.src = src;
    864    i->Xin.Sse32FLo.dst = dst;
    865    vassert(op != Xsse_MOV);
    866    return i;
    867 }
    868 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
    869    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
    870    i->tag              = Xin_Sse64Fx2;
    871    i->Xin.Sse64Fx2.op  = op;
    872    i->Xin.Sse64Fx2.src = src;
    873    i->Xin.Sse64Fx2.dst = dst;
    874    vassert(op != Xsse_MOV);
    875    return i;
    876 }
    877 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
    878    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
    879    i->tag              = Xin_Sse64FLo;
    880    i->Xin.Sse64FLo.op  = op;
    881    i->Xin.Sse64FLo.src = src;
    882    i->Xin.Sse64FLo.dst = dst;
    883    vassert(op != Xsse_MOV);
    884    return i;
    885 }
    886 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
    887    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
    888    i->tag             = Xin_SseReRg;
    889    i->Xin.SseReRg.op  = op;
    890    i->Xin.SseReRg.src = re;
    891    i->Xin.SseReRg.dst = rg;
    892    return i;
    893 }
    894 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
    895    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
    896    i->tag              = Xin_SseCMov;
    897    i->Xin.SseCMov.cond = cond;
    898    i->Xin.SseCMov.src  = src;
    899    i->Xin.SseCMov.dst  = dst;
    900    vassert(cond != Xcc_ALWAYS);
    901    return i;
    902 }
    903 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
    904    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
    905    i->tag               = Xin_SseShuf;
    906    i->Xin.SseShuf.order = order;
    907    i->Xin.SseShuf.src   = src;
    908    i->Xin.SseShuf.dst   = dst;
    909    vassert(order >= 0 && order <= 0xFF);
    910    return i;
    911 }
    912 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
    913                              X86AMode* amFailAddr ) {
    914    X86Instr* i               = LibVEX_Alloc_inline(sizeof(X86Instr));
    915    i->tag                    = Xin_EvCheck;
    916    i->Xin.EvCheck.amCounter  = amCounter;
    917    i->Xin.EvCheck.amFailAddr = amFailAddr;
    918    return i;
    919 }
    920 X86Instr* X86Instr_ProfInc ( void ) {
    921    X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
    922    i->tag      = Xin_ProfInc;
    923    return i;
    924 }
    925 
    926 void ppX86Instr ( const X86Instr* i, Bool mode64 ) {
    927    vassert(mode64 == False);
    928    switch (i->tag) {
    929       case Xin_Alu32R:
    930          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
    931          ppX86RMI(i->Xin.Alu32R.src);
    932          vex_printf(",");
    933          ppHRegX86(i->Xin.Alu32R.dst);
    934          return;
    935       case Xin_Alu32M:
    936          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
    937          ppX86RI(i->Xin.Alu32M.src);
    938          vex_printf(",");
    939          ppX86AMode(i->Xin.Alu32M.dst);
    940          return;
    941       case Xin_Sh32:
    942          vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
    943          if (i->Xin.Sh32.src == 0)
    944            vex_printf("%%cl,");
    945          else
    946             vex_printf("$%d,", (Int)i->Xin.Sh32.src);
    947          ppHRegX86(i->Xin.Sh32.dst);
    948          return;
    949       case Xin_Test32:
    950          vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
    951          ppX86RM(i->Xin.Test32.dst);
    952          return;
    953       case Xin_Unary32:
    954          vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
    955          ppHRegX86(i->Xin.Unary32.dst);
    956          return;
    957       case Xin_Lea32:
    958          vex_printf("leal ");
    959          ppX86AMode(i->Xin.Lea32.am);
    960          vex_printf(",");
    961          ppHRegX86(i->Xin.Lea32.dst);
    962          return;
    963       case Xin_MulL:
    964          vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
    965          ppX86RM(i->Xin.MulL.src);
    966          return;
    967       case Xin_Div:
    968          vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
    969          ppX86RM(i->Xin.Div.src);
    970          return;
    971       case Xin_Sh3232:
    972          vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
    973          if (i->Xin.Sh3232.amt == 0)
    974            vex_printf(" %%cl,");
    975          else
    976             vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
    977          ppHRegX86(i->Xin.Sh3232.src);
    978          vex_printf(",");
    979          ppHRegX86(i->Xin.Sh3232.dst);
    980          return;
    981       case Xin_Push:
    982          vex_printf("pushl ");
    983          ppX86RMI(i->Xin.Push.src);
    984          return;
    985       case Xin_Call:
    986          vex_printf("call%s[%d,",
    987                     i->Xin.Call.cond==Xcc_ALWAYS
    988                        ? "" : showX86CondCode(i->Xin.Call.cond),
    989                     i->Xin.Call.regparms);
    990          ppRetLoc(i->Xin.Call.rloc);
    991          vex_printf("] 0x%x", i->Xin.Call.target);
    992          break;
    993       case Xin_XDirect:
    994          vex_printf("(xDirect) ");
    995          vex_printf("if (%%eflags.%s) { ",
    996                     showX86CondCode(i->Xin.XDirect.cond));
    997          vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
    998          ppX86AMode(i->Xin.XDirect.amEIP);
    999          vex_printf("; ");
   1000          vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
   1001                     i->Xin.XDirect.toFastEP ? "fast" : "slow");
   1002          return;
   1003       case Xin_XIndir:
   1004          vex_printf("(xIndir) ");
   1005          vex_printf("if (%%eflags.%s) { movl ",
   1006                     showX86CondCode(i->Xin.XIndir.cond));
   1007          ppHRegX86(i->Xin.XIndir.dstGA);
   1008          vex_printf(",");
   1009          ppX86AMode(i->Xin.XIndir.amEIP);
   1010          vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
   1011          return;
   1012       case Xin_XAssisted:
   1013          vex_printf("(xAssisted) ");
   1014          vex_printf("if (%%eflags.%s) { ",
   1015                     showX86CondCode(i->Xin.XAssisted.cond));
   1016          vex_printf("movl ");
   1017          ppHRegX86(i->Xin.XAssisted.dstGA);
   1018          vex_printf(",");
   1019          ppX86AMode(i->Xin.XAssisted.amEIP);
   1020          vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
   1021                     (Int)i->Xin.XAssisted.jk);
   1022          vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
   1023          return;
   1024       case Xin_CMov32:
   1025          vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
   1026          ppX86RM(i->Xin.CMov32.src);
   1027          vex_printf(",");
   1028          ppHRegX86(i->Xin.CMov32.dst);
   1029          return;
   1030       case Xin_LoadEX:
   1031          vex_printf("mov%c%cl ",
   1032                     i->Xin.LoadEX.syned ? 's' : 'z',
   1033                     i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
   1034          ppX86AMode(i->Xin.LoadEX.src);
   1035          vex_printf(",");
   1036          ppHRegX86(i->Xin.LoadEX.dst);
   1037          return;
   1038       case Xin_Store:
   1039          vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
   1040          ppHRegX86(i->Xin.Store.src);
   1041          vex_printf(",");
   1042          ppX86AMode(i->Xin.Store.dst);
   1043          return;
   1044       case Xin_Set32:
   1045          vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
   1046          ppHRegX86(i->Xin.Set32.dst);
   1047          return;
   1048       case Xin_Bsfr32:
   1049          vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
   1050          ppHRegX86(i->Xin.Bsfr32.src);
   1051          vex_printf(",");
   1052          ppHRegX86(i->Xin.Bsfr32.dst);
   1053          return;
   1054       case Xin_MFence:
   1055          vex_printf("mfence(%s)",
   1056                     LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
   1057          return;
   1058       case Xin_ACAS:
   1059          vex_printf("lock cmpxchg%c ",
   1060                      i->Xin.ACAS.sz==1 ? 'b'
   1061                                        : i->Xin.ACAS.sz==2 ? 'w' : 'l');
   1062          vex_printf("{%%eax->%%ebx},");
   1063          ppX86AMode(i->Xin.ACAS.addr);
   1064          return;
   1065       case Xin_DACAS:
   1066          vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
   1067          ppX86AMode(i->Xin.DACAS.addr);
   1068          return;
   1069       case Xin_FpUnary:
   1070          vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
   1071          ppHRegX86(i->Xin.FpUnary.src);
   1072          vex_printf(",");
   1073          ppHRegX86(i->Xin.FpUnary.dst);
   1074          break;
   1075       case Xin_FpBinary:
   1076          vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
   1077          ppHRegX86(i->Xin.FpBinary.srcL);
   1078          vex_printf(",");
   1079          ppHRegX86(i->Xin.FpBinary.srcR);
   1080          vex_printf(",");
   1081          ppHRegX86(i->Xin.FpBinary.dst);
   1082          break;
   1083       case Xin_FpLdSt:
   1084          if (i->Xin.FpLdSt.isLoad) {
   1085             vex_printf("gld%c " ,  i->Xin.FpLdSt.sz==10 ? 'T'
   1086                                    : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
   1087             ppX86AMode(i->Xin.FpLdSt.addr);
   1088             vex_printf(", ");
   1089             ppHRegX86(i->Xin.FpLdSt.reg);
   1090          } else {
   1091             vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
   1092                                   : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
   1093             ppHRegX86(i->Xin.FpLdSt.reg);
   1094             vex_printf(", ");
   1095             ppX86AMode(i->Xin.FpLdSt.addr);
   1096          }
   1097          return;
   1098       case Xin_FpLdStI:
   1099          if (i->Xin.FpLdStI.isLoad) {
   1100             vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1101                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1102             ppX86AMode(i->Xin.FpLdStI.addr);
   1103             vex_printf(", ");
   1104             ppHRegX86(i->Xin.FpLdStI.reg);
   1105          } else {
   1106             vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1107                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1108             ppHRegX86(i->Xin.FpLdStI.reg);
   1109             vex_printf(", ");
   1110             ppX86AMode(i->Xin.FpLdStI.addr);
   1111          }
   1112          return;
   1113       case Xin_Fp64to32:
   1114          vex_printf("gdtof ");
   1115          ppHRegX86(i->Xin.Fp64to32.src);
   1116          vex_printf(",");
   1117          ppHRegX86(i->Xin.Fp64to32.dst);
   1118          return;
   1119       case Xin_FpCMov:
   1120          vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
   1121          ppHRegX86(i->Xin.FpCMov.src);
   1122          vex_printf(",");
   1123          ppHRegX86(i->Xin.FpCMov.dst);
   1124          return;
   1125       case Xin_FpLdCW:
   1126          vex_printf("fldcw ");
   1127          ppX86AMode(i->Xin.FpLdCW.addr);
   1128          return;
   1129       case Xin_FpStSW_AX:
   1130          vex_printf("fstsw %%ax");
   1131          return;
   1132       case Xin_FpCmp:
   1133          vex_printf("gcmp ");
   1134          ppHRegX86(i->Xin.FpCmp.srcL);
   1135          vex_printf(",");
   1136          ppHRegX86(i->Xin.FpCmp.srcR);
   1137          vex_printf(",");
   1138          ppHRegX86(i->Xin.FpCmp.dst);
   1139          break;
   1140       case Xin_SseConst:
   1141          vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
   1142          ppHRegX86(i->Xin.SseConst.dst);
   1143          break;
   1144       case Xin_SseLdSt:
   1145          vex_printf("movups ");
   1146          if (i->Xin.SseLdSt.isLoad) {
   1147             ppX86AMode(i->Xin.SseLdSt.addr);
   1148             vex_printf(",");
   1149             ppHRegX86(i->Xin.SseLdSt.reg);
   1150          } else {
   1151             ppHRegX86(i->Xin.SseLdSt.reg);
   1152             vex_printf(",");
   1153             ppX86AMode(i->Xin.SseLdSt.addr);
   1154          }
   1155          return;
   1156       case Xin_SseLdzLO:
   1157          vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
   1158          ppX86AMode(i->Xin.SseLdzLO.addr);
   1159          vex_printf(",");
   1160          ppHRegX86(i->Xin.SseLdzLO.reg);
   1161          return;
   1162       case Xin_Sse32Fx4:
   1163          vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
   1164          ppHRegX86(i->Xin.Sse32Fx4.src);
   1165          vex_printf(",");
   1166          ppHRegX86(i->Xin.Sse32Fx4.dst);
   1167          return;
   1168       case Xin_Sse32FLo:
   1169          vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
   1170          ppHRegX86(i->Xin.Sse32FLo.src);
   1171          vex_printf(",");
   1172          ppHRegX86(i->Xin.Sse32FLo.dst);
   1173          return;
   1174       case Xin_Sse64Fx2:
   1175          vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
   1176          ppHRegX86(i->Xin.Sse64Fx2.src);
   1177          vex_printf(",");
   1178          ppHRegX86(i->Xin.Sse64Fx2.dst);
   1179          return;
   1180       case Xin_Sse64FLo:
   1181          vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
   1182          ppHRegX86(i->Xin.Sse64FLo.src);
   1183          vex_printf(",");
   1184          ppHRegX86(i->Xin.Sse64FLo.dst);
   1185          return;
   1186       case Xin_SseReRg:
   1187          vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
   1188          ppHRegX86(i->Xin.SseReRg.src);
   1189          vex_printf(",");
   1190          ppHRegX86(i->Xin.SseReRg.dst);
   1191          return;
   1192       case Xin_SseCMov:
   1193          vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
   1194          ppHRegX86(i->Xin.SseCMov.src);
   1195          vex_printf(",");
   1196          ppHRegX86(i->Xin.SseCMov.dst);
   1197          return;
   1198       case Xin_SseShuf:
   1199          vex_printf("pshufd $0x%x,", (UInt)i->Xin.SseShuf.order);
   1200          ppHRegX86(i->Xin.SseShuf.src);
   1201          vex_printf(",");
   1202          ppHRegX86(i->Xin.SseShuf.dst);
   1203          return;
   1204       case Xin_EvCheck:
   1205          vex_printf("(evCheck) decl ");
   1206          ppX86AMode(i->Xin.EvCheck.amCounter);
   1207          vex_printf("; jns nofail; jmp *");
   1208          ppX86AMode(i->Xin.EvCheck.amFailAddr);
   1209          vex_printf("; nofail:");
   1210          return;
   1211       case Xin_ProfInc:
   1212          vex_printf("(profInc) addl $1,NotKnownYet; "
   1213                     "adcl $0,NotKnownYet+4");
   1214          return;
   1215       default:
   1216          vpanic("ppX86Instr");
   1217    }
   1218 }
   1219 
   1220 /* --------- Helpers for register allocation. --------- */
   1221 
   1222 void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
   1223 {
   1224    Bool unary;
   1225    vassert(mode64 == False);
   1226    initHRegUsage(u);
   1227    switch (i->tag) {
   1228       case Xin_Alu32R:
   1229          addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
   1230          if (i->Xin.Alu32R.op == Xalu_MOV) {
   1231             addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
   1232             return;
   1233          }
   1234          if (i->Xin.Alu32R.op == Xalu_CMP) {
   1235             addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
   1236             return;
   1237          }
   1238          addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
   1239          return;
   1240       case Xin_Alu32M:
   1241          addRegUsage_X86RI(u, i->Xin.Alu32M.src);
   1242          addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
   1243          return;
   1244       case Xin_Sh32:
   1245          addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
   1246          if (i->Xin.Sh32.src == 0)
   1247             addHRegUse(u, HRmRead, hregX86_ECX());
   1248          return;
   1249       case Xin_Test32:
   1250          addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
   1251          return;
   1252       case Xin_Unary32:
   1253          addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
   1254          return;
   1255       case Xin_Lea32:
   1256          addRegUsage_X86AMode(u, i->Xin.Lea32.am);
   1257          addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
   1258          return;
   1259       case Xin_MulL:
   1260          addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
   1261          addHRegUse(u, HRmModify, hregX86_EAX());
   1262          addHRegUse(u, HRmWrite, hregX86_EDX());
   1263          return;
   1264       case Xin_Div:
   1265          addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
   1266          addHRegUse(u, HRmModify, hregX86_EAX());
   1267          addHRegUse(u, HRmModify, hregX86_EDX());
   1268          return;
   1269       case Xin_Sh3232:
   1270          addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
   1271          addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
   1272          if (i->Xin.Sh3232.amt == 0)
   1273             addHRegUse(u, HRmRead, hregX86_ECX());
   1274          return;
   1275       case Xin_Push:
   1276          addRegUsage_X86RMI(u, i->Xin.Push.src);
   1277          addHRegUse(u, HRmModify, hregX86_ESP());
   1278          return;
   1279       case Xin_Call:
   1280          /* This is a bit subtle. */
   1281          /* First off, claim it trashes all the caller-saved regs
   1282             which fall within the register allocator's jurisdiction.
   1283             These I believe to be %eax %ecx %edx and all the xmm
   1284             registers. */
   1285          addHRegUse(u, HRmWrite, hregX86_EAX());
   1286          addHRegUse(u, HRmWrite, hregX86_ECX());
   1287          addHRegUse(u, HRmWrite, hregX86_EDX());
   1288          addHRegUse(u, HRmWrite, hregX86_XMM0());
   1289          addHRegUse(u, HRmWrite, hregX86_XMM1());
   1290          addHRegUse(u, HRmWrite, hregX86_XMM2());
   1291          addHRegUse(u, HRmWrite, hregX86_XMM3());
   1292          addHRegUse(u, HRmWrite, hregX86_XMM4());
   1293          addHRegUse(u, HRmWrite, hregX86_XMM5());
   1294          addHRegUse(u, HRmWrite, hregX86_XMM6());
   1295          addHRegUse(u, HRmWrite, hregX86_XMM7());
   1296          /* Now we have to state any parameter-carrying registers
   1297             which might be read.  This depends on the regparmness. */
   1298          switch (i->Xin.Call.regparms) {
   1299             case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
   1300             case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
   1301             case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
   1302             case 0: break;
   1303             default: vpanic("getRegUsage_X86Instr:Call:regparms");
   1304          }
   1305          /* Finally, there is the issue that the insn trashes a
   1306             register because the literal target address has to be
   1307             loaded into a register.  Fortunately, for the 0/1/2
   1308             regparm case, we can use EAX, EDX and ECX respectively, so
   1309             this does not cause any further damage.  For the 3-regparm
   1310             case, we'll have to choose another register arbitrarily --
   1311             since A, D and C are used for parameters -- and so we might
   1312             as well choose EDI. */
   1313          if (i->Xin.Call.regparms == 3)
   1314             addHRegUse(u, HRmWrite, hregX86_EDI());
   1315          /* Upshot of this is that the assembler really must observe
   1316             the here-stated convention of which register to use as an
   1317             address temporary, depending on the regparmness: 0==EAX,
   1318             1==EDX, 2==ECX, 3==EDI. */
   1319          return;
   1320       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
   1321          conditionally exit the block.  Hence we only need to list (1)
   1322          the registers that they read, and (2) the registers that they
   1323          write in the case where the block is not exited.  (2) is
   1324          empty, hence only (1) is relevant here. */
   1325       case Xin_XDirect:
   1326          addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
   1327          return;
   1328       case Xin_XIndir:
   1329          addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
   1330          addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
   1331          return;
   1332       case Xin_XAssisted:
   1333          addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
   1334          addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
   1335          return;
   1336       case Xin_CMov32:
   1337          addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
   1338          addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
   1339          return;
   1340       case Xin_LoadEX:
   1341          addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
   1342          addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
   1343          return;
   1344       case Xin_Store:
   1345          addHRegUse(u, HRmRead, i->Xin.Store.src);
   1346          addRegUsage_X86AMode(u, i->Xin.Store.dst);
   1347          return;
   1348       case Xin_Set32:
   1349          addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
   1350          return;
   1351       case Xin_Bsfr32:
   1352          addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
   1353          addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
   1354          return;
   1355       case Xin_MFence:
   1356          return;
   1357       case Xin_ACAS:
   1358          addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
   1359          addHRegUse(u, HRmRead, hregX86_EBX());
   1360          addHRegUse(u, HRmModify, hregX86_EAX());
   1361          return;
   1362       case Xin_DACAS:
   1363          addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
   1364          addHRegUse(u, HRmRead, hregX86_ECX());
   1365          addHRegUse(u, HRmRead, hregX86_EBX());
   1366          addHRegUse(u, HRmModify, hregX86_EDX());
   1367          addHRegUse(u, HRmModify, hregX86_EAX());
   1368          return;
   1369       case Xin_FpUnary:
   1370          addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
   1371          addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
   1372          return;
   1373       case Xin_FpBinary:
   1374          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
   1375          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
   1376          addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
   1377          return;
   1378       case Xin_FpLdSt:
   1379          addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
   1380          addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
   1381                        i->Xin.FpLdSt.reg);
   1382          return;
   1383       case Xin_FpLdStI:
   1384          addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
   1385          addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
   1386                        i->Xin.FpLdStI.reg);
   1387          return;
   1388       case Xin_Fp64to32:
   1389          addHRegUse(u, HRmRead,  i->Xin.Fp64to32.src);
   1390          addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
   1391          return;
   1392       case Xin_FpCMov:
   1393          addHRegUse(u, HRmRead,   i->Xin.FpCMov.src);
   1394          addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
   1395          return;
   1396       case Xin_FpLdCW:
   1397          addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
   1398          return;
   1399       case Xin_FpStSW_AX:
   1400          addHRegUse(u, HRmWrite, hregX86_EAX());
   1401          return;
   1402       case Xin_FpCmp:
   1403          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
   1404          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
   1405          addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
   1406          addHRegUse(u, HRmWrite, hregX86_EAX());
   1407          return;
   1408       case Xin_SseLdSt:
   1409          addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
   1410          addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
   1411                        i->Xin.SseLdSt.reg);
   1412          return;
   1413       case Xin_SseLdzLO:
   1414          addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
   1415          addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
   1416          return;
   1417       case Xin_SseConst:
   1418          addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
   1419          return;
   1420       case Xin_Sse32Fx4:
   1421          vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
   1422          unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
   1423                          || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
   1424                          || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
   1425          addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
   1426          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1427                        i->Xin.Sse32Fx4.dst);
   1428          return;
   1429       case Xin_Sse32FLo:
   1430          vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
   1431          unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
   1432                          || i->Xin.Sse32FLo.op == Xsse_RSQRTF
   1433                          || i->Xin.Sse32FLo.op == Xsse_SQRTF );
   1434          addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
   1435          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1436                        i->Xin.Sse32FLo.dst);
   1437          return;
   1438       case Xin_Sse64Fx2:
   1439          vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
   1440          unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
   1441                          || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
   1442                          || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
   1443          addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
   1444          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1445                        i->Xin.Sse64Fx2.dst);
   1446          return;
   1447       case Xin_Sse64FLo:
   1448          vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
   1449          unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
   1450                          || i->Xin.Sse64FLo.op == Xsse_RSQRTF
   1451                          || i->Xin.Sse64FLo.op == Xsse_SQRTF );
   1452          addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
   1453          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1454                        i->Xin.Sse64FLo.dst);
   1455          return;
   1456       case Xin_SseReRg:
   1457          if (i->Xin.SseReRg.op == Xsse_XOR
   1458              && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) {
   1459             /* reg-alloc needs to understand 'xor r,r' as a write of r */
   1460             /* (as opposed to a rite of passage :-) */
   1461             addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
   1462          } else {
   1463             addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
   1464             addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
   1465                              ? HRmWrite : HRmModify,
   1466                           i->Xin.SseReRg.dst);
   1467          }
   1468          return;
   1469       case Xin_SseCMov:
   1470          addHRegUse(u, HRmRead,   i->Xin.SseCMov.src);
   1471          addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
   1472          return;
   1473       case Xin_SseShuf:
   1474          addHRegUse(u, HRmRead,  i->Xin.SseShuf.src);
   1475          addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
   1476          return;
   1477       case Xin_EvCheck:
   1478          /* We expect both amodes only to mention %ebp, so this is in
   1479             fact pointless, since %ebp isn't allocatable, but anyway.. */
   1480          addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
   1481          addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
   1482          return;
   1483       case Xin_ProfInc:
   1484          /* does not use any registers. */
   1485          return;
   1486       default:
   1487          ppX86Instr(i, False);
   1488          vpanic("getRegUsage_X86Instr");
   1489    }
   1490 }
   1491 
   1492 /* local helper */
   1493 static void mapReg( HRegRemap* m, HReg* r )
   1494 {
   1495    *r = lookupHRegRemap(m, *r);
   1496 }
   1497 
   1498 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
   1499 {
   1500    vassert(mode64 == False);
   1501    switch (i->tag) {
   1502       case Xin_Alu32R:
   1503          mapRegs_X86RMI(m, i->Xin.Alu32R.src);
   1504          mapReg(m, &i->Xin.Alu32R.dst);
   1505          return;
   1506       case Xin_Alu32M:
   1507          mapRegs_X86RI(m, i->Xin.Alu32M.src);
   1508          mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
   1509          return;
   1510       case Xin_Sh32:
   1511          mapReg(m, &i->Xin.Sh32.dst);
   1512          return;
   1513       case Xin_Test32:
   1514          mapRegs_X86RM(m, i->Xin.Test32.dst);
   1515          return;
   1516       case Xin_Unary32:
   1517          mapReg(m, &i->Xin.Unary32.dst);
   1518          return;
   1519       case Xin_Lea32:
   1520          mapRegs_X86AMode(m, i->Xin.Lea32.am);
   1521          mapReg(m, &i->Xin.Lea32.dst);
   1522          return;
   1523       case Xin_MulL:
   1524          mapRegs_X86RM(m, i->Xin.MulL.src);
   1525          return;
   1526       case Xin_Div:
   1527          mapRegs_X86RM(m, i->Xin.Div.src);
   1528          return;
   1529       case Xin_Sh3232:
   1530          mapReg(m, &i->Xin.Sh3232.src);
   1531          mapReg(m, &i->Xin.Sh3232.dst);
   1532          return;
   1533       case Xin_Push:
   1534          mapRegs_X86RMI(m, i->Xin.Push.src);
   1535          return;
   1536       case Xin_Call:
   1537          return;
   1538       case Xin_XDirect:
   1539          mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
   1540          return;
   1541       case Xin_XIndir:
   1542          mapReg(m, &i->Xin.XIndir.dstGA);
   1543          mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
   1544          return;
   1545       case Xin_XAssisted:
   1546          mapReg(m, &i->Xin.XAssisted.dstGA);
   1547          mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
   1548          return;
   1549       case Xin_CMov32:
   1550          mapRegs_X86RM(m, i->Xin.CMov32.src);
   1551          mapReg(m, &i->Xin.CMov32.dst);
   1552          return;
   1553       case Xin_LoadEX:
   1554          mapRegs_X86AMode(m, i->Xin.LoadEX.src);
   1555          mapReg(m, &i->Xin.LoadEX.dst);
   1556          return;
   1557       case Xin_Store:
   1558          mapReg(m, &i->Xin.Store.src);
   1559          mapRegs_X86AMode(m, i->Xin.Store.dst);
   1560          return;
   1561       case Xin_Set32:
   1562          mapReg(m, &i->Xin.Set32.dst);
   1563          return;
   1564       case Xin_Bsfr32:
   1565          mapReg(m, &i->Xin.Bsfr32.src);
   1566          mapReg(m, &i->Xin.Bsfr32.dst);
   1567          return;
   1568       case Xin_MFence:
   1569          return;
   1570       case Xin_ACAS:
   1571          mapRegs_X86AMode(m, i->Xin.ACAS.addr);
   1572          return;
   1573       case Xin_DACAS:
   1574          mapRegs_X86AMode(m, i->Xin.DACAS.addr);
   1575          return;
   1576       case Xin_FpUnary:
   1577          mapReg(m, &i->Xin.FpUnary.src);
   1578          mapReg(m, &i->Xin.FpUnary.dst);
   1579          return;
   1580       case Xin_FpBinary:
   1581          mapReg(m, &i->Xin.FpBinary.srcL);
   1582          mapReg(m, &i->Xin.FpBinary.srcR);
   1583          mapReg(m, &i->Xin.FpBinary.dst);
   1584          return;
   1585       case Xin_FpLdSt:
   1586          mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
   1587          mapReg(m, &i->Xin.FpLdSt.reg);
   1588          return;
   1589       case Xin_FpLdStI:
   1590          mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
   1591          mapReg(m, &i->Xin.FpLdStI.reg);
   1592          return;
   1593       case Xin_Fp64to32:
   1594          mapReg(m, &i->Xin.Fp64to32.src);
   1595          mapReg(m, &i->Xin.Fp64to32.dst);
   1596          return;
   1597       case Xin_FpCMov:
   1598          mapReg(m, &i->Xin.FpCMov.src);
   1599          mapReg(m, &i->Xin.FpCMov.dst);
   1600          return;
   1601       case Xin_FpLdCW:
   1602          mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
   1603          return;
   1604       case Xin_FpStSW_AX:
   1605          return;
   1606       case Xin_FpCmp:
   1607          mapReg(m, &i->Xin.FpCmp.srcL);
   1608          mapReg(m, &i->Xin.FpCmp.srcR);
   1609          mapReg(m, &i->Xin.FpCmp.dst);
   1610          return;
   1611       case Xin_SseConst:
   1612          mapReg(m, &i->Xin.SseConst.dst);
   1613          return;
   1614       case Xin_SseLdSt:
   1615          mapReg(m, &i->Xin.SseLdSt.reg);
   1616          mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
   1617          break;
   1618       case Xin_SseLdzLO:
   1619          mapReg(m, &i->Xin.SseLdzLO.reg);
   1620          mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
   1621          break;
   1622       case Xin_Sse32Fx4:
   1623          mapReg(m, &i->Xin.Sse32Fx4.src);
   1624          mapReg(m, &i->Xin.Sse32Fx4.dst);
   1625          return;
   1626       case Xin_Sse32FLo:
   1627          mapReg(m, &i->Xin.Sse32FLo.src);
   1628          mapReg(m, &i->Xin.Sse32FLo.dst);
   1629          return;
   1630       case Xin_Sse64Fx2:
   1631          mapReg(m, &i->Xin.Sse64Fx2.src);
   1632          mapReg(m, &i->Xin.Sse64Fx2.dst);
   1633          return;
   1634       case Xin_Sse64FLo:
   1635          mapReg(m, &i->Xin.Sse64FLo.src);
   1636          mapReg(m, &i->Xin.Sse64FLo.dst);
   1637          return;
   1638       case Xin_SseReRg:
   1639          mapReg(m, &i->Xin.SseReRg.src);
   1640          mapReg(m, &i->Xin.SseReRg.dst);
   1641          return;
   1642       case Xin_SseCMov:
   1643          mapReg(m, &i->Xin.SseCMov.src);
   1644          mapReg(m, &i->Xin.SseCMov.dst);
   1645          return;
   1646       case Xin_SseShuf:
   1647          mapReg(m, &i->Xin.SseShuf.src);
   1648          mapReg(m, &i->Xin.SseShuf.dst);
   1649          return;
   1650       case Xin_EvCheck:
   1651          /* We expect both amodes only to mention %ebp, so this is in
   1652             fact pointless, since %ebp isn't allocatable, but anyway.. */
   1653          mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
   1654          mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
   1655          return;
   1656       case Xin_ProfInc:
   1657          /* does not use any registers. */
   1658          return;
   1659 
   1660       default:
   1661          ppX86Instr(i, mode64);
   1662          vpanic("mapRegs_X86Instr");
   1663    }
   1664 }
   1665 
   1666 /* Figure out if i represents a reg-reg move, and if so assign the
   1667    source and destination to *src and *dst.  If in doubt say No.  Used
   1668    by the register allocator to do move coalescing.
   1669 */
   1670 Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst )
   1671 {
   1672    /* Moves between integer regs */
   1673    if (i->tag == Xin_Alu32R) {
   1674       if (i->Xin.Alu32R.op != Xalu_MOV)
   1675          return False;
   1676       if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
   1677          return False;
   1678       *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
   1679       *dst = i->Xin.Alu32R.dst;
   1680       return True;
   1681    }
   1682    /* Moves between FP regs */
   1683    if (i->tag == Xin_FpUnary) {
   1684       if (i->Xin.FpUnary.op != Xfp_MOV)
   1685          return False;
   1686       *src = i->Xin.FpUnary.src;
   1687       *dst = i->Xin.FpUnary.dst;
   1688       return True;
   1689    }
   1690    if (i->tag == Xin_SseReRg) {
   1691       if (i->Xin.SseReRg.op != Xsse_MOV)
   1692          return False;
   1693       *src = i->Xin.SseReRg.src;
   1694       *dst = i->Xin.SseReRg.dst;
   1695       return True;
   1696    }
   1697    return False;
   1698 }
   1699 
   1700 
   1701 /* Generate x86 spill/reload instructions under the direction of the
   1702    register allocator.  Note it's critical these don't write the
   1703    condition codes. */
   1704 
   1705 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1706                     HReg rreg, Int offsetB, Bool mode64 )
   1707 {
   1708    X86AMode* am;
   1709    vassert(offsetB >= 0);
   1710    vassert(!hregIsVirtual(rreg));
   1711    vassert(mode64 == False);
   1712    *i1 = *i2 = NULL;
   1713    am = X86AMode_IR(offsetB, hregX86_EBP());
   1714    switch (hregClass(rreg)) {
   1715       case HRcInt32:
   1716          *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
   1717          return;
   1718       case HRcFlt64:
   1719          *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
   1720          return;
   1721       case HRcVec128:
   1722          *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
   1723          return;
   1724       default:
   1725          ppHRegClass(hregClass(rreg));
   1726          vpanic("genSpill_X86: unimplemented regclass");
   1727    }
   1728 }
   1729 
   1730 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1731                      HReg rreg, Int offsetB, Bool mode64 )
   1732 {
   1733    X86AMode* am;
   1734    vassert(offsetB >= 0);
   1735    vassert(!hregIsVirtual(rreg));
   1736    vassert(mode64 == False);
   1737    *i1 = *i2 = NULL;
   1738    am = X86AMode_IR(offsetB, hregX86_EBP());
   1739    switch (hregClass(rreg)) {
   1740       case HRcInt32:
   1741          *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
   1742          return;
   1743       case HRcFlt64:
   1744          *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
   1745          return;
   1746       case HRcVec128:
   1747          *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
   1748          return;
   1749       default:
   1750          ppHRegClass(hregClass(rreg));
   1751          vpanic("genReload_X86: unimplemented regclass");
   1752    }
   1753 }
   1754 
   1755 /* The given instruction reads the specified vreg exactly once, and
   1756    that vreg is currently located at the given spill offset.  If
   1757    possible, return a variant of the instruction to one which instead
   1758    references the spill slot directly. */
   1759 
   1760 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
   1761 {
   1762    vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
   1763 
   1764    /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
   1765       Convert to: src=RMI_Mem, dst=Reg
   1766    */
   1767    if (i->tag == Xin_Alu32R
   1768        && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
   1769            || i->Xin.Alu32R.op == Xalu_XOR)
   1770        && i->Xin.Alu32R.src->tag == Xrmi_Reg
   1771        && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) {
   1772       vassert(! sameHReg(i->Xin.Alu32R.dst, vreg));
   1773       return X86Instr_Alu32R(
   1774                 i->Xin.Alu32R.op,
   1775                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
   1776                 i->Xin.Alu32R.dst
   1777              );
   1778    }
   1779 
   1780    /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
   1781       Convert to: src=RI_Imm, dst=Mem
   1782    */
   1783    if (i->tag == Xin_Alu32R
   1784        && (i->Xin.Alu32R.op == Xalu_CMP)
   1785        && i->Xin.Alu32R.src->tag == Xrmi_Imm
   1786        && sameHReg(i->Xin.Alu32R.dst, vreg)) {
   1787       return X86Instr_Alu32M(
   1788                 i->Xin.Alu32R.op,
   1789 		X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
   1790                 X86AMode_IR( spill_off, hregX86_EBP())
   1791              );
   1792    }
   1793 
   1794    /* Deal with form: Push(RMI_Reg)
   1795       Convert to: Push(RMI_Mem)
   1796    */
   1797    if (i->tag == Xin_Push
   1798        && i->Xin.Push.src->tag == Xrmi_Reg
   1799        && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) {
   1800       return X86Instr_Push(
   1801                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
   1802              );
   1803    }
   1804 
   1805    /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
   1806       Convert to CMov32(RM_Mem, dst) */
   1807    if (i->tag == Xin_CMov32
   1808        && i->Xin.CMov32.src->tag == Xrm_Reg
   1809        && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) {
   1810       vassert(! sameHReg(i->Xin.CMov32.dst, vreg));
   1811       return X86Instr_CMov32(
   1812                 i->Xin.CMov32.cond,
   1813                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
   1814                 i->Xin.CMov32.dst
   1815              );
   1816    }
   1817 
   1818    /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
   1819    if (i->tag == Xin_Test32
   1820        && i->Xin.Test32.dst->tag == Xrm_Reg
   1821        && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) {
   1822       return X86Instr_Test32(
   1823                 i->Xin.Test32.imm32,
   1824                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
   1825              );
   1826    }
   1827 
   1828    return NULL;
   1829 }
   1830 
   1831 
   1832 /* --------- The x86 assembler (bleh.) --------- */
   1833 
   1834 inline static UInt iregEnc ( HReg r )
   1835 {
   1836    UInt n;
   1837    vassert(hregClass(r) == HRcInt32);
   1838    vassert(!hregIsVirtual(r));
   1839    n = hregEncoding(r);
   1840    vassert(n <= 7);
   1841    return n;
   1842 }
   1843 
   1844 inline static UInt fregEnc ( HReg r )
   1845 {
   1846    UInt n;
   1847    vassert(hregClass(r) == HRcFlt64);
   1848    vassert(!hregIsVirtual(r));
   1849    n = hregEncoding(r);
   1850    vassert(n <= 5);
   1851    return n;
   1852 }
   1853 
   1854 inline static UInt vregEnc ( HReg r )
   1855 {
   1856    UInt n;
   1857    vassert(hregClass(r) == HRcVec128);
   1858    vassert(!hregIsVirtual(r));
   1859    n = hregEncoding(r);
   1860    vassert(n <= 7);
   1861    return n;
   1862 }
   1863 
   1864 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
   1865 {
   1866    vassert(mod < 4);
   1867    vassert((reg|regmem) < 8);
   1868    return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
   1869 }
   1870 
   1871 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
   1872 {
   1873    vassert(shift < 4);
   1874    vassert((regindex|regbase) < 8);
   1875    return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
   1876 }
   1877 
   1878 static UChar* emit32 ( UChar* p, UInt w32 )
   1879 {
   1880    *p++ = toUChar( w32        & 0x000000FF);
   1881    *p++ = toUChar((w32 >>  8) & 0x000000FF);
   1882    *p++ = toUChar((w32 >> 16) & 0x000000FF);
   1883    *p++ = toUChar((w32 >> 24) & 0x000000FF);
   1884    return p;
   1885 }
   1886 
   1887 /* Does a sign-extend of the lowest 8 bits give
   1888    the original number? */
   1889 static Bool fits8bits ( UInt w32 )
   1890 {
   1891    Int i32 = (Int)w32;
   1892    return toBool(i32 == ((Int)(w32 << 24) >> 24));
   1893 }
   1894 
   1895 
   1896 /* Forming mod-reg-rm bytes and scale-index-base bytes.
   1897 
   1898      greg,  0(ereg)    |  ereg != ESP && ereg != EBP
   1899                        =  00 greg ereg
   1900 
   1901      greg,  d8(ereg)   |  ereg != ESP
   1902                        =  01 greg ereg, d8
   1903 
   1904      greg,  d32(ereg)  |  ereg != ESP
   1905                        =  10 greg ereg, d32
   1906 
   1907      greg,  d8(%esp)   =  01 greg 100, 0x24, d8
   1908 
   1909      -----------------------------------------------
   1910 
   1911      greg,  d8(base,index,scale)
   1912                |  index != ESP
   1913                =  01 greg 100, scale index base, d8
   1914 
   1915      greg,  d32(base,index,scale)
   1916                |  index != ESP
   1917                =  10 greg 100, scale index base, d32
   1918 */
   1919 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc, X86AMode* am )
   1920 {
   1921    if (am->tag == Xam_IR) {
   1922       if (am->Xam.IR.imm == 0
   1923           && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())
   1924           && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) {
   1925          *p++ = mkModRegRM(0, gregEnc, iregEnc(am->Xam.IR.reg));
   1926          return p;
   1927       }
   1928       if (fits8bits(am->Xam.IR.imm)
   1929           && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
   1930          *p++ = mkModRegRM(1, gregEnc, iregEnc(am->Xam.IR.reg));
   1931          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
   1932          return p;
   1933       }
   1934       if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
   1935          *p++ = mkModRegRM(2, gregEnc, iregEnc(am->Xam.IR.reg));
   1936          p = emit32(p, am->Xam.IR.imm);
   1937          return p;
   1938       }
   1939       if (sameHReg(am->Xam.IR.reg, hregX86_ESP())
   1940           && fits8bits(am->Xam.IR.imm)) {
   1941  	 *p++ = mkModRegRM(1, gregEnc, 4);
   1942          *p++ = 0x24;
   1943          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
   1944          return p;
   1945       }
   1946       ppX86AMode(am);
   1947       vpanic("doAMode_M: can't emit amode IR");
   1948       /*NOTREACHED*/
   1949    }
   1950    if (am->tag == Xam_IRRS) {
   1951       if (fits8bits(am->Xam.IRRS.imm)
   1952           && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
   1953          *p++ = mkModRegRM(1, gregEnc, 4);
   1954          *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index),
   1955                                           iregEnc(am->Xam.IRRS.base));
   1956          *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
   1957          return p;
   1958       }
   1959       if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
   1960          *p++ = mkModRegRM(2, gregEnc, 4);
   1961          *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index),
   1962                                           iregEnc(am->Xam.IRRS.base));
   1963          p = emit32(p, am->Xam.IRRS.imm);
   1964          return p;
   1965       }
   1966       ppX86AMode(am);
   1967       vpanic("doAMode_M: can't emit amode IRRS");
   1968       /*NOTREACHED*/
   1969    }
   1970    vpanic("doAMode_M: unknown amode");
   1971    /*NOTREACHED*/
   1972 }
   1973 
   1974 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
   1975 {
   1976    return doAMode_M__wrk(p, iregEnc(greg), am);
   1977 }
   1978 
   1979 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc, X86AMode* am )
   1980 {
   1981    vassert(gregEnc < 8);
   1982    return doAMode_M__wrk(p, gregEnc, am);
   1983 }
   1984 
   1985 
   1986 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
   1987 inline static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc, UInt eregEnc )
   1988 {
   1989    *p++ = mkModRegRM(3, gregEnc, eregEnc);
   1990    return p;
   1991 }
   1992 
   1993 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
   1994 {
   1995    return doAMode_R__wrk(p, iregEnc(greg), iregEnc(ereg));
   1996 }
   1997 
   1998 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc, HReg ereg )
   1999 {
   2000    vassert(gregEnc < 8);
   2001    return doAMode_R__wrk(p, gregEnc, iregEnc(ereg));
   2002 }
   2003 
   2004 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc, UInt eregEnc )
   2005 {
   2006    vassert( (gregEnc|eregEnc) < 8);
   2007    return doAMode_R__wrk(p, gregEnc, eregEnc);
   2008 }
   2009 
   2010 
   2011 /* Emit ffree %st(7) */
   2012 static UChar* do_ffree_st7 ( UChar* p )
   2013 {
   2014    *p++ = 0xDD;
   2015    *p++ = 0xC7;
   2016    return p;
   2017 }
   2018 
   2019 /* Emit fstp %st(i), 1 <= i <= 7 */
   2020 static UChar* do_fstp_st ( UChar* p, Int i )
   2021 {
   2022    vassert(1 <= i && i <= 7);
   2023    *p++ = 0xDD;
   2024    *p++ = toUChar(0xD8+i);
   2025    return p;
   2026 }
   2027 
   2028 /* Emit fld %st(i), 0 <= i <= 6 */
   2029 static UChar* do_fld_st ( UChar* p, Int i )
   2030 {
   2031    vassert(0 <= i && i <= 6);
   2032    *p++ = 0xD9;
   2033    *p++ = toUChar(0xC0+i);
   2034    return p;
   2035 }
   2036 
   2037 /* Emit f<op> %st(0) */
   2038 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
   2039 {
   2040    switch (op) {
   2041       case Xfp_NEG:    *p++ = 0xD9; *p++ = 0xE0; break;
   2042       case Xfp_ABS:    *p++ = 0xD9; *p++ = 0xE1; break;
   2043       case Xfp_SQRT:   *p++ = 0xD9; *p++ = 0xFA; break;
   2044       case Xfp_ROUND:  *p++ = 0xD9; *p++ = 0xFC; break;
   2045       case Xfp_SIN:    *p++ = 0xD9; *p++ = 0xFE; break;
   2046       case Xfp_COS:    *p++ = 0xD9; *p++ = 0xFF; break;
   2047       case Xfp_2XM1:   *p++ = 0xD9; *p++ = 0xF0; break;
   2048       case Xfp_MOV:    break;
   2049       case Xfp_TAN:
   2050          /* fptan pushes 1.0 on the FP stack, except when the argument
   2051             is out of range.  Hence we have to do the instruction,
   2052             then inspect C2 to see if there is an out of range
   2053             condition.  If there is, we skip the fincstp that is used
   2054             by the in-range case to get rid of this extra 1.0
   2055             value. */
   2056          p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */
   2057          *p++ = 0xD9; *p++ = 0xF2; // fptan
   2058          *p++ = 0x50;              // pushl %eax
   2059          *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
   2060          *p++ = 0x66; *p++ = 0xA9;
   2061          *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
   2062          *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
   2063          *p++ = 0xD9; *p++ = 0xF7; // fincstp
   2064          *p++ = 0x58;              // after_fincstp: popl %eax
   2065          break;
   2066       default:
   2067          vpanic("do_fop1_st: unknown op");
   2068    }
   2069    return p;
   2070 }
   2071 
   2072 /* Emit f<op> %st(i), 1 <= i <= 5 */
   2073 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
   2074 {
   2075    Int subopc;
   2076    switch (op) {
   2077       case Xfp_ADD: subopc = 0; break;
   2078       case Xfp_SUB: subopc = 4; break;
   2079       case Xfp_MUL: subopc = 1; break;
   2080       case Xfp_DIV: subopc = 6; break;
   2081       default: vpanic("do_fop2_st: unknown op");
   2082    }
   2083    *p++ = 0xD8;
   2084    p    = doAMode_R_enc_enc(p, subopc, i);
   2085    return p;
   2086 }
   2087 
   2088 /* Push a 32-bit word on the stack.  The word depends on tags[3:0];
   2089 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
   2090 */
   2091 static UChar* push_word_from_tags ( UChar* p, UShort tags )
   2092 {
   2093    UInt w;
   2094    vassert(0 == (tags & ~0xF));
   2095    if (tags == 0) {
   2096       /* pushl $0x00000000 */
   2097       *p++ = 0x6A;
   2098       *p++ = 0x00;
   2099    }
   2100    else
   2101    /* pushl $0xFFFFFFFF */
   2102    if (tags == 0xF) {
   2103       *p++ = 0x6A;
   2104       *p++ = 0xFF;
   2105    } else {
   2106       vassert(0); /* awaiting test case */
   2107       w = 0;
   2108       if (tags & 1) w |= 0x000000FF;
   2109       if (tags & 2) w |= 0x0000FF00;
   2110       if (tags & 4) w |= 0x00FF0000;
   2111       if (tags & 8) w |= 0xFF000000;
   2112       *p++ = 0x68;
   2113       p = emit32(p, w);
   2114    }
   2115    return p;
   2116 }
   2117 
   2118 /* Emit an instruction into buf and return the number of bytes used.
   2119    Note that buf is not the insn's final place, and therefore it is
   2120    imperative to emit position-independent code.  If the emitted
   2121    instruction was a profiler inc, set *is_profInc to True, else
   2122    leave it unchanged. */
   2123 
   2124 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
   2125                     UChar* buf, Int nbuf, const X86Instr* i,
   2126                     Bool mode64, VexEndness endness_host,
   2127                     const void* disp_cp_chain_me_to_slowEP,
   2128                     const void* disp_cp_chain_me_to_fastEP,
   2129                     const void* disp_cp_xindir,
   2130                     const void* disp_cp_xassisted )
   2131 {
   2132    UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
   2133 
   2134    UInt   xtra;
   2135    UChar* p = &buf[0];
   2136    UChar* ptmp;
   2137    vassert(nbuf >= 32);
   2138    vassert(mode64 == False);
   2139 
   2140    /* vex_printf("asm  ");ppX86Instr(i, mode64); vex_printf("\n"); */
   2141 
   2142    switch (i->tag) {
   2143 
   2144    case Xin_Alu32R:
   2145       /* Deal specially with MOV */
   2146       if (i->Xin.Alu32R.op == Xalu_MOV) {
   2147          switch (i->Xin.Alu32R.src->tag) {
   2148             case Xrmi_Imm:
   2149                *p++ = toUChar(0xB8 + iregEnc(i->Xin.Alu32R.dst));
   2150                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2151                goto done;
   2152             case Xrmi_Reg:
   2153                *p++ = 0x89;
   2154                p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
   2155                                 i->Xin.Alu32R.dst);
   2156                goto done;
   2157             case Xrmi_Mem:
   2158                *p++ = 0x8B;
   2159                p = doAMode_M(p, i->Xin.Alu32R.dst,
   2160                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
   2161                goto done;
   2162             default:
   2163                goto bad;
   2164          }
   2165       }
   2166       /* MUL */
   2167       if (i->Xin.Alu32R.op == Xalu_MUL) {
   2168          switch (i->Xin.Alu32R.src->tag) {
   2169             case Xrmi_Reg:
   2170                *p++ = 0x0F;
   2171                *p++ = 0xAF;
   2172                p = doAMode_R(p, i->Xin.Alu32R.dst,
   2173                                 i->Xin.Alu32R.src->Xrmi.Reg.reg);
   2174                goto done;
   2175             case Xrmi_Mem:
   2176                *p++ = 0x0F;
   2177                *p++ = 0xAF;
   2178                p = doAMode_M(p, i->Xin.Alu32R.dst,
   2179                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
   2180                goto done;
   2181             case Xrmi_Imm:
   2182                if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2183                   *p++ = 0x6B;
   2184                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
   2185                   *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2186                } else {
   2187                   *p++ = 0x69;
   2188                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
   2189                   p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2190                }
   2191                goto done;
   2192             default:
   2193                goto bad;
   2194          }
   2195       }
   2196       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
   2197       opc = opc_rr = subopc_imm = opc_imma = 0;
   2198       switch (i->Xin.Alu32R.op) {
   2199          case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
   2200                         subopc_imm = 2; opc_imma = 0x15; break;
   2201          case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
   2202                         subopc_imm = 0; opc_imma = 0x05; break;
   2203          case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
   2204                         subopc_imm = 5; opc_imma = 0x2D; break;
   2205          case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
   2206                         subopc_imm = 3; opc_imma = 0x1D; break;
   2207          case Xalu_AND: opc = 0x23; opc_rr = 0x21;
   2208                         subopc_imm = 4; opc_imma = 0x25; break;
   2209          case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
   2210                         subopc_imm = 6; opc_imma = 0x35; break;
   2211          case Xalu_OR:  opc = 0x0B; opc_rr = 0x09;
   2212                         subopc_imm = 1; opc_imma = 0x0D; break;
   2213          case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
   2214                         subopc_imm = 7; opc_imma = 0x3D; break;
   2215          default: goto bad;
   2216       }
   2217       switch (i->Xin.Alu32R.src->tag) {
   2218          case Xrmi_Imm:
   2219             if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX())
   2220                 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2221                *p++ = toUChar(opc_imma);
   2222                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2223             } else
   2224             if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2225                *p++ = 0x83;
   2226                p    = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst);
   2227                *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2228             } else {
   2229                *p++ = 0x81;
   2230                p    = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst);
   2231                p    = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2232             }
   2233             goto done;
   2234          case Xrmi_Reg:
   2235             *p++ = toUChar(opc_rr);
   2236             p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
   2237                              i->Xin.Alu32R.dst);
   2238             goto done;
   2239          case Xrmi_Mem:
   2240             *p++ = toUChar(opc);
   2241             p = doAMode_M(p, i->Xin.Alu32R.dst,
   2242                              i->Xin.Alu32R.src->Xrmi.Mem.am);
   2243             goto done;
   2244          default:
   2245             goto bad;
   2246       }
   2247       break;
   2248 
   2249    case Xin_Alu32M:
   2250       /* Deal specially with MOV */
   2251       if (i->Xin.Alu32M.op == Xalu_MOV) {
   2252          switch (i->Xin.Alu32M.src->tag) {
   2253             case Xri_Reg:
   2254                *p++ = 0x89;
   2255                p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2256                                 i->Xin.Alu32M.dst);
   2257                goto done;
   2258             case Xri_Imm:
   2259                *p++ = 0xC7;
   2260                p = doAMode_M_enc(p, 0, i->Xin.Alu32M.dst);
   2261                p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2262                goto done;
   2263             default:
   2264                goto bad;
   2265          }
   2266       }
   2267       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP.  MUL is not
   2268          allowed here. */
   2269       opc = subopc_imm = opc_imma = 0;
   2270       switch (i->Xin.Alu32M.op) {
   2271          case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
   2272          case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
   2273          case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
   2274          default: goto bad;
   2275       }
   2276       switch (i->Xin.Alu32M.src->tag) {
   2277          case Xri_Reg:
   2278             *p++ = toUChar(opc);
   2279             p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2280                              i->Xin.Alu32M.dst);
   2281             goto done;
   2282          case Xri_Imm:
   2283             if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
   2284                *p++ = 0x83;
   2285                p    = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst);
   2286                *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
   2287                goto done;
   2288             } else {
   2289                *p++ = 0x81;
   2290                p    = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst);
   2291                p    = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2292                goto done;
   2293             }
   2294          default:
   2295             goto bad;
   2296       }
   2297       break;
   2298 
   2299    case Xin_Sh32:
   2300       opc_cl = opc_imm = subopc = 0;
   2301       switch (i->Xin.Sh32.op) {
   2302          case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
   2303          case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
   2304          case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
   2305          default: goto bad;
   2306       }
   2307       if (i->Xin.Sh32.src == 0) {
   2308          *p++ = toUChar(opc_cl);
   2309          p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst);
   2310       } else {
   2311          *p++ = toUChar(opc_imm);
   2312          p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst);
   2313          *p++ = (UChar)(i->Xin.Sh32.src);
   2314       }
   2315       goto done;
   2316 
   2317    case Xin_Test32:
   2318       if (i->Xin.Test32.dst->tag == Xrm_Reg) {
   2319          /* testl $imm32, %reg */
   2320          *p++ = 0xF7;
   2321          p = doAMode_R_enc_reg(p, 0, i->Xin.Test32.dst->Xrm.Reg.reg);
   2322          p = emit32(p, i->Xin.Test32.imm32);
   2323          goto done;
   2324       } else {
   2325          /* testl $imm32, amode */
   2326          *p++ = 0xF7;
   2327          p = doAMode_M_enc(p, 0, i->Xin.Test32.dst->Xrm.Mem.am);
   2328          p = emit32(p, i->Xin.Test32.imm32);
   2329          goto done;
   2330       }
   2331 
   2332    case Xin_Unary32:
   2333       if (i->Xin.Unary32.op == Xun_NOT) {
   2334          *p++ = 0xF7;
   2335          p = doAMode_R_enc_reg(p, 2, i->Xin.Unary32.dst);
   2336          goto done;
   2337       }
   2338       if (i->Xin.Unary32.op == Xun_NEG) {
   2339          *p++ = 0xF7;
   2340          p = doAMode_R_enc_reg(p, 3, i->Xin.Unary32.dst);
   2341          goto done;
   2342       }
   2343       break;
   2344 
   2345    case Xin_Lea32:
   2346       *p++ = 0x8D;
   2347       p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
   2348       goto done;
   2349 
   2350    case Xin_MulL:
   2351       subopc = i->Xin.MulL.syned ? 5 : 4;
   2352       *p++ = 0xF7;
   2353       switch (i->Xin.MulL.src->tag)  {
   2354          case Xrm_Mem:
   2355             p = doAMode_M_enc(p, subopc, i->Xin.MulL.src->Xrm.Mem.am);
   2356             goto done;
   2357          case Xrm_Reg:
   2358             p = doAMode_R_enc_reg(p, subopc, i->Xin.MulL.src->Xrm.Reg.reg);
   2359             goto done;
   2360          default:
   2361             goto bad;
   2362       }
   2363       break;
   2364 
   2365    case Xin_Div:
   2366       subopc = i->Xin.Div.syned ? 7 : 6;
   2367       *p++ = 0xF7;
   2368       switch (i->Xin.Div.src->tag)  {
   2369          case Xrm_Mem:
   2370             p = doAMode_M_enc(p, subopc, i->Xin.Div.src->Xrm.Mem.am);
   2371             goto done;
   2372          case Xrm_Reg:
   2373             p = doAMode_R_enc_reg(p, subopc, i->Xin.Div.src->Xrm.Reg.reg);
   2374             goto done;
   2375          default:
   2376             goto bad;
   2377       }
   2378       break;
   2379 
   2380    case Xin_Sh3232:
   2381       vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
   2382       if (i->Xin.Sh3232.amt == 0) {
   2383          /* shldl/shrdl by %cl */
   2384          *p++ = 0x0F;
   2385          if (i->Xin.Sh3232.op == Xsh_SHL) {
   2386             *p++ = 0xA5;
   2387          } else {
   2388             *p++ = 0xAD;
   2389          }
   2390          p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
   2391          goto done;
   2392       }
   2393       break;
   2394 
   2395    case Xin_Push:
   2396       switch (i->Xin.Push.src->tag) {
   2397          case Xrmi_Mem:
   2398             *p++ = 0xFF;
   2399             p = doAMode_M_enc(p, 6, i->Xin.Push.src->Xrmi.Mem.am);
   2400             goto done;
   2401          case Xrmi_Imm:
   2402             *p++ = 0x68;
   2403             p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
   2404             goto done;
   2405          case Xrmi_Reg:
   2406             *p++ = toUChar(0x50 + iregEnc(i->Xin.Push.src->Xrmi.Reg.reg));
   2407             goto done;
   2408         default:
   2409             goto bad;
   2410       }
   2411 
   2412    case Xin_Call:
   2413       if (i->Xin.Call.cond != Xcc_ALWAYS
   2414           && i->Xin.Call.rloc.pri != RLPri_None) {
   2415          /* The call might not happen (it isn't unconditional) and it
   2416             returns a result.  In this case we will need to generate a
   2417             control flow diamond to put 0x555..555 in the return
   2418             register(s) in the case where the call doesn't happen.  If
   2419             this ever becomes necessary, maybe copy code from the ARM
   2420             equivalent.  Until that day, just give up. */
   2421          goto bad;
   2422       }
   2423       /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
   2424          for explanation of this. */
   2425       switch (i->Xin.Call.regparms) {
   2426          case 0: irno = iregEnc(hregX86_EAX()); break;
   2427          case 1: irno = iregEnc(hregX86_EDX()); break;
   2428          case 2: irno = iregEnc(hregX86_ECX()); break;
   2429          case 3: irno = iregEnc(hregX86_EDI()); break;
   2430          default: vpanic(" emit_X86Instr:call:regparms");
   2431       }
   2432       /* jump over the following two insns if the condition does not
   2433          hold */
   2434       if (i->Xin.Call.cond != Xcc_ALWAYS) {
   2435          *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
   2436          *p++ = 0x07; /* 7 bytes in the next two insns */
   2437       }
   2438       /* movl $target, %tmp */
   2439       *p++ = toUChar(0xB8 + irno);
   2440       p = emit32(p, i->Xin.Call.target);
   2441       /* call *%tmp */
   2442       *p++ = 0xFF;
   2443       *p++ = toUChar(0xD0 + irno);
   2444       goto done;
   2445 
   2446    case Xin_XDirect: {
   2447       /* NB: what goes on here has to be very closely coordinated with the
   2448          chainXDirect_X86 and unchainXDirect_X86 below. */
   2449       /* We're generating chain-me requests here, so we need to be
   2450          sure this is actually allowed -- no-redir translations can't
   2451          use chain-me's.  Hence: */
   2452       vassert(disp_cp_chain_me_to_slowEP != NULL);
   2453       vassert(disp_cp_chain_me_to_fastEP != NULL);
   2454 
   2455       /* Use ptmp for backpatching conditional jumps. */
   2456       ptmp = NULL;
   2457 
   2458       /* First off, if this is conditional, create a conditional
   2459          jump over the rest of it. */
   2460       if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
   2461          /* jmp fwds if !condition */
   2462          *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
   2463          ptmp = p; /* fill in this bit later */
   2464          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2465       }
   2466 
   2467       /* Update the guest EIP. */
   2468       /* movl $dstGA, amEIP */
   2469       *p++ = 0xC7;
   2470       p    = doAMode_M_enc(p, 0, i->Xin.XDirect.amEIP);
   2471       p    = emit32(p, i->Xin.XDirect.dstGA);
   2472 
   2473       /* --- FIRST PATCHABLE BYTE follows --- */
   2474       /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
   2475          to) backs up the return address, so as to find the address of
   2476          the first patchable byte.  So: don't change the length of the
   2477          two instructions below. */
   2478       /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
   2479       *p++ = 0xBA;
   2480       const void* disp_cp_chain_me
   2481                = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
   2482                                          : disp_cp_chain_me_to_slowEP;
   2483       p = emit32(p, (UInt)(Addr)disp_cp_chain_me);
   2484       /* call *%edx */
   2485       *p++ = 0xFF;
   2486       *p++ = 0xD2;
   2487       /* --- END of PATCHABLE BYTES --- */
   2488 
   2489       /* Fix up the conditional jump, if there was one. */
   2490       if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
   2491          Int delta = p - ptmp;
   2492          vassert(delta > 0 && delta < 40);
   2493          *ptmp = toUChar(delta-1);
   2494       }
   2495       goto done;
   2496    }
   2497 
   2498    case Xin_XIndir: {
   2499       /* We're generating transfers that could lead indirectly to a
   2500          chain-me, so we need to be sure this is actually allowed --
   2501          no-redir translations are not allowed to reach normal
   2502          translations without going through the scheduler.  That means
   2503          no XDirects or XIndirs out from no-redir translations.
   2504          Hence: */
   2505       vassert(disp_cp_xindir != NULL);
   2506 
   2507       /* Use ptmp for backpatching conditional jumps. */
   2508       ptmp = NULL;
   2509 
   2510       /* First off, if this is conditional, create a conditional
   2511          jump over the rest of it. */
   2512       if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
   2513          /* jmp fwds if !condition */
   2514          *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
   2515          ptmp = p; /* fill in this bit later */
   2516          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2517       }
   2518 
   2519       /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
   2520       *p++ = 0x89;
   2521       p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
   2522 
   2523       /* movl $disp_indir, %edx */
   2524       *p++ = 0xBA;
   2525       p = emit32(p, (UInt)(Addr)disp_cp_xindir);
   2526       /* jmp *%edx */
   2527       *p++ = 0xFF;
   2528       *p++ = 0xE2;
   2529 
   2530       /* Fix up the conditional jump, if there was one. */
   2531       if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
   2532          Int delta = p - ptmp;
   2533          vassert(delta > 0 && delta < 40);
   2534          *ptmp = toUChar(delta-1);
   2535       }
   2536       goto done;
   2537    }
   2538 
   2539    case Xin_XAssisted: {
   2540       /* Use ptmp for backpatching conditional jumps. */
   2541       ptmp = NULL;
   2542 
   2543       /* First off, if this is conditional, create a conditional
   2544          jump over the rest of it. */
   2545       if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
   2546          /* jmp fwds if !condition */
   2547          *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
   2548          ptmp = p; /* fill in this bit later */
   2549          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2550       }
   2551 
   2552       /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
   2553       *p++ = 0x89;
   2554       p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
   2555       /* movl $magic_number, %ebp. */
   2556       UInt trcval = 0;
   2557       switch (i->Xin.XAssisted.jk) {
   2558          case Ijk_ClientReq:    trcval = VEX_TRC_JMP_CLIENTREQ;    break;
   2559          case Ijk_Sys_syscall:  trcval = VEX_TRC_JMP_SYS_SYSCALL;  break;
   2560          case Ijk_Sys_int128:   trcval = VEX_TRC_JMP_SYS_INT128;   break;
   2561          case Ijk_Sys_int129:   trcval = VEX_TRC_JMP_SYS_INT129;   break;
   2562          case Ijk_Sys_int130:   trcval = VEX_TRC_JMP_SYS_INT130;   break;
   2563          case Ijk_Sys_int145:   trcval = VEX_TRC_JMP_SYS_INT145;   break;
   2564          case Ijk_Sys_int210:   trcval = VEX_TRC_JMP_SYS_INT210;   break;
   2565          case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break;
   2566          case Ijk_Yield:        trcval = VEX_TRC_JMP_YIELD;        break;
   2567          case Ijk_EmWarn:       trcval = VEX_TRC_JMP_EMWARN;       break;
   2568          case Ijk_MapFail:      trcval = VEX_TRC_JMP_MAPFAIL;      break;
   2569          case Ijk_NoDecode:     trcval = VEX_TRC_JMP_NODECODE;     break;
   2570          case Ijk_InvalICache:  trcval = VEX_TRC_JMP_INVALICACHE;  break;
   2571          case Ijk_NoRedir:      trcval = VEX_TRC_JMP_NOREDIR;      break;
   2572          case Ijk_SigTRAP:      trcval = VEX_TRC_JMP_SIGTRAP;      break;
   2573          case Ijk_SigSEGV:      trcval = VEX_TRC_JMP_SIGSEGV;      break;
   2574          case Ijk_Boring:       trcval = VEX_TRC_JMP_BORING;       break;
   2575          /* We don't expect to see the following being assisted. */
   2576          case Ijk_Ret:
   2577          case Ijk_Call:
   2578          /* fallthrough */
   2579          default:
   2580             ppIRJumpKind(i->Xin.XAssisted.jk);
   2581             vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
   2582       }
   2583       vassert(trcval != 0);
   2584       *p++ = 0xBD;
   2585       p = emit32(p, trcval);
   2586 
   2587       /* movl $disp_indir, %edx */
   2588       *p++ = 0xBA;
   2589       p = emit32(p, (UInt)(Addr)disp_cp_xassisted);
   2590       /* jmp *%edx */
   2591       *p++ = 0xFF;
   2592       *p++ = 0xE2;
   2593 
   2594       /* Fix up the conditional jump, if there was one. */
   2595       if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
   2596          Int delta = p - ptmp;
   2597          vassert(delta > 0 && delta < 40);
   2598          *ptmp = toUChar(delta-1);
   2599       }
   2600       goto done;
   2601    }
   2602 
   2603    case Xin_CMov32:
   2604       vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
   2605 
   2606       /* This generates cmov, which is illegal on P54/P55. */
   2607       /*
   2608       *p++ = 0x0F;
   2609       *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
   2610       if (i->Xin.CMov32.src->tag == Xrm_Reg) {
   2611          p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
   2612          goto done;
   2613       }
   2614       if (i->Xin.CMov32.src->tag == Xrm_Mem) {
   2615          p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
   2616          goto done;
   2617       }
   2618       */
   2619 
   2620       /* Alternative version which works on any x86 variant. */
   2621       /* jmp fwds if !condition */
   2622       *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
   2623       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   2624       ptmp = p;
   2625 
   2626       switch (i->Xin.CMov32.src->tag) {
   2627          case Xrm_Reg:
   2628             /* Big sigh.  This is movl E -> G ... */
   2629             *p++ = 0x89;
   2630             p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
   2631                              i->Xin.CMov32.dst);
   2632 
   2633             break;
   2634          case Xrm_Mem:
   2635             /* ... whereas this is movl G -> E.  That's why the args
   2636                to doAMode_R appear to be the wrong way round in the
   2637                Xrm_Reg case. */
   2638             *p++ = 0x8B;
   2639             p = doAMode_M(p, i->Xin.CMov32.dst,
   2640                              i->Xin.CMov32.src->Xrm.Mem.am);
   2641             break;
   2642          default:
   2643             goto bad;
   2644       }
   2645       /* Fill in the jump offset. */
   2646       *(ptmp-1) = toUChar(p - ptmp);
   2647       goto done;
   2648 
   2649       break;
   2650 
   2651    case Xin_LoadEX:
   2652       if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
   2653          /* movzbl */
   2654          *p++ = 0x0F;
   2655          *p++ = 0xB6;
   2656          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2657          goto done;
   2658       }
   2659       if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
   2660          /* movzwl */
   2661          *p++ = 0x0F;
   2662          *p++ = 0xB7;
   2663          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2664          goto done;
   2665       }
   2666       if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
   2667          /* movsbl */
   2668          *p++ = 0x0F;
   2669          *p++ = 0xBE;
   2670          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2671          goto done;
   2672       }
   2673       break;
   2674 
   2675    case Xin_Set32:
   2676       /* Make the destination register be 1 or 0, depending on whether
   2677          the relevant condition holds.  We have to dodge and weave
   2678          when the destination is %esi or %edi as we cannot directly
   2679          emit the native 'setb %reg' for those.  Further complication:
   2680          the top 24 bits of the destination should be forced to zero,
   2681          but doing 'xor %r,%r' kills the flag(s) we are about to read.
   2682          Sigh.  So start off my moving $0 into the dest. */
   2683 
   2684       /* Do we need to swap in %eax? */
   2685       if (iregEnc(i->Xin.Set32.dst) >= 4) {
   2686          /* xchg %eax, %dst */
   2687          *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst));
   2688          /* movl $0, %eax */
   2689          *p++ =toUChar(0xB8 + iregEnc(hregX86_EAX()));
   2690          p = emit32(p, 0);
   2691          /* setb lo8(%eax) */
   2692          *p++ = 0x0F;
   2693          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
   2694          p = doAMode_R_enc_reg(p, 0, hregX86_EAX());
   2695          /* xchg %eax, %dst */
   2696          *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst));
   2697       } else {
   2698          /* movl $0, %dst */
   2699          *p++ = toUChar(0xB8 + iregEnc(i->Xin.Set32.dst));
   2700          p = emit32(p, 0);
   2701          /* setb lo8(%dst) */
   2702          *p++ = 0x0F;
   2703          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
   2704          p = doAMode_R_enc_reg(p, 0, i->Xin.Set32.dst);
   2705       }
   2706       goto done;
   2707 
   2708    case Xin_Bsfr32:
   2709       *p++ = 0x0F;
   2710       if (i->Xin.Bsfr32.isFwds) {
   2711          *p++ = 0xBC;
   2712       } else {
   2713          *p++ = 0xBD;
   2714       }
   2715       p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
   2716       goto done;
   2717 
   2718    case Xin_MFence:
   2719       /* see comment in hdefs.h re this insn */
   2720       if (0) vex_printf("EMIT FENCE\n");
   2721       if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
   2722                                   |VEX_HWCAPS_X86_SSE2)) {
   2723          /* mfence */
   2724          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
   2725          goto done;
   2726       }
   2727       if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
   2728          /* sfence */
   2729          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
   2730          /* lock addl $0,0(%esp) */
   2731          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
   2732          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
   2733          goto done;
   2734       }
   2735       if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
   2736          /* lock addl $0,0(%esp) */
   2737          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
   2738          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
   2739          goto done;
   2740       }
   2741       vpanic("emit_X86Instr:mfence:hwcaps");
   2742       /*NOTREACHED*/
   2743       break;
   2744 
   2745    case Xin_ACAS:
   2746       /* lock */
   2747       *p++ = 0xF0;
   2748       /* cmpxchg{b,w,l} %ebx,mem.  Expected-value in %eax, new value
   2749          in %ebx.  The new-value register is hardwired to be %ebx
   2750          since letting it be any integer register gives the problem
   2751          that %sil and %dil are unaddressible on x86 and hence we
   2752          would have to resort to the same kind of trickery as with
   2753          byte-sized Xin.Store, just below.  Given that this isn't
   2754          performance critical, it is simpler just to force the
   2755          register operand to %ebx (could equally be %ecx or %edx).
   2756          (Although %ebx is more consistent with cmpxchg8b.) */
   2757       if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
   2758       *p++ = 0x0F;
   2759       if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
   2760       p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
   2761       goto done;
   2762 
   2763    case Xin_DACAS:
   2764       /* lock */
   2765       *p++ = 0xF0;
   2766       /* cmpxchg8b m64.  Expected-value in %edx:%eax, new value
   2767          in %ecx:%ebx.  All 4 regs are hardwired in the ISA, so
   2768          aren't encoded in the insn. */
   2769       *p++ = 0x0F;
   2770       *p++ = 0xC7;
   2771       p = doAMode_M_enc(p, 1, i->Xin.DACAS.addr);
   2772       goto done;
   2773 
   2774    case Xin_Store:
   2775       if (i->Xin.Store.sz == 2) {
   2776          /* This case, at least, is simple, given that we can
   2777             reference the low 16 bits of any integer register. */
   2778          *p++ = 0x66;
   2779          *p++ = 0x89;
   2780          p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
   2781          goto done;
   2782       }
   2783 
   2784       if (i->Xin.Store.sz == 1) {
   2785          /* We have to do complex dodging and weaving if src is not
   2786             the low 8 bits of %eax/%ebx/%ecx/%edx. */
   2787          if (iregEnc(i->Xin.Store.src) < 4) {
   2788             /* we're OK, can do it directly */
   2789             *p++ = 0x88;
   2790             p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
   2791            goto done;
   2792          } else {
   2793             /* Bleh.  This means the source is %edi or %esi.  Since
   2794                the address mode can only mention three registers, at
   2795                least one of %eax/%ebx/%ecx/%edx must be available to
   2796                temporarily swap the source into, so the store can
   2797                happen.  So we have to look at the regs mentioned
   2798                in the amode. */
   2799             HReg swap = INVALID_HREG;
   2800             HReg  eax = hregX86_EAX(), ebx = hregX86_EBX(),
   2801                   ecx = hregX86_ECX(), edx = hregX86_EDX();
   2802             HRegUsage u;
   2803             initHRegUsage(&u);
   2804             addRegUsage_X86AMode(&u, i->Xin.Store.dst);
   2805             /**/ if (! HRegUsage__contains(&u, eax)) { swap = eax; }
   2806             else if (! HRegUsage__contains(&u, ebx)) { swap = ebx; }
   2807             else if (! HRegUsage__contains(&u, ecx)) { swap = ecx; }
   2808             else if (! HRegUsage__contains(&u, edx)) { swap = edx; }
   2809             vassert(! hregIsInvalid(swap));
   2810             /* xchgl %source, %swap. Could do better if swap is %eax. */
   2811             *p++ = 0x87;
   2812             p = doAMode_R(p, i->Xin.Store.src, swap);
   2813             /* movb lo8{%swap}, (dst) */
   2814             *p++ = 0x88;
   2815             p = doAMode_M(p, swap, i->Xin.Store.dst);
   2816             /* xchgl %source, %swap. Could do better if swap is %eax. */
   2817             *p++ = 0x87;
   2818             p = doAMode_R(p, i->Xin.Store.src, swap);
   2819             goto done;
   2820          }
   2821       } /* if (i->Xin.Store.sz == 1) */
   2822       break;
   2823 
   2824    case Xin_FpUnary:
   2825       /* gop %src, %dst
   2826          --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
   2827       */
   2828       p = do_ffree_st7(p);
   2829       p = do_fld_st(p, 0+fregEnc(i->Xin.FpUnary.src));
   2830       p = do_fop1_st(p, i->Xin.FpUnary.op);
   2831       p = do_fstp_st(p, 1+fregEnc(i->Xin.FpUnary.dst));
   2832       goto done;
   2833 
   2834    case Xin_FpBinary:
   2835       if (i->Xin.FpBinary.op == Xfp_YL2X
   2836           || i->Xin.FpBinary.op == Xfp_YL2XP1) {
   2837          /* Have to do this specially. */
   2838          /* ffree %st7 ; fld %st(srcL) ;
   2839             ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
   2840          p = do_ffree_st7(p);
   2841          p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
   2842          p = do_ffree_st7(p);
   2843          p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR));
   2844          *p++ = 0xD9;
   2845          *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
   2846          p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
   2847          goto done;
   2848       }
   2849       if (i->Xin.FpBinary.op == Xfp_ATAN) {
   2850          /* Have to do this specially. */
   2851          /* ffree %st7 ; fld %st(srcL) ;
   2852             ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
   2853          p = do_ffree_st7(p);
   2854          p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
   2855          p = do_ffree_st7(p);
   2856          p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR));
   2857          *p++ = 0xD9; *p++ = 0xF3;
   2858          p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
   2859          goto done;
   2860       }
   2861       if (i->Xin.FpBinary.op == Xfp_PREM
   2862           || i->Xin.FpBinary.op == Xfp_PREM1
   2863           || i->Xin.FpBinary.op == Xfp_SCALE) {
   2864          /* Have to do this specially. */
   2865          /* ffree %st7 ; fld %st(srcR) ;
   2866             ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
   2867             fincstp ; ffree %st7 */
   2868          p = do_ffree_st7(p);
   2869          p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcR));
   2870          p = do_ffree_st7(p);
   2871          p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcL));
   2872          *p++ = 0xD9;
   2873          switch (i->Xin.FpBinary.op) {
   2874             case Xfp_PREM: *p++ = 0xF8; break;
   2875             case Xfp_PREM1: *p++ = 0xF5; break;
   2876             case Xfp_SCALE: *p++ =  0xFD; break;
   2877             default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
   2878          }
   2879          p = do_fstp_st(p, 2+fregEnc(i->Xin.FpBinary.dst));
   2880          *p++ = 0xD9; *p++ = 0xF7;
   2881          p = do_ffree_st7(p);
   2882          goto done;
   2883       }
   2884       /* General case */
   2885       /* gop %srcL, %srcR, %dst
   2886          --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
   2887       */
   2888       p = do_ffree_st7(p);
   2889       p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
   2890       p = do_fop2_st(p, i->Xin.FpBinary.op,
   2891                         1+fregEnc(i->Xin.FpBinary.srcR));
   2892       p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
   2893       goto done;
   2894 
   2895    case Xin_FpLdSt:
   2896       if (i->Xin.FpLdSt.isLoad) {
   2897          /* Load from memory into %fakeN.
   2898             --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
   2899          */
   2900          p = do_ffree_st7(p);
   2901          switch (i->Xin.FpLdSt.sz) {
   2902             case 4:
   2903                *p++ = 0xD9;
   2904                p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr);
   2905                break;
   2906             case 8:
   2907                *p++ = 0xDD;
   2908                p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr);
   2909                break;
   2910             case 10:
   2911                *p++ = 0xDB;
   2912                p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdSt.addr);
   2913                break;
   2914             default:
   2915                vpanic("emitX86Instr(FpLdSt,load)");
   2916          }
   2917          p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdSt.reg));
   2918          goto done;
   2919       } else {
   2920          /* Store from %fakeN into memory.
   2921             --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
   2922 	 */
   2923          p = do_ffree_st7(p);
   2924          p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdSt.reg));
   2925          switch (i->Xin.FpLdSt.sz) {
   2926             case 4:
   2927                *p++ = 0xD9;
   2928                p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr);
   2929                break;
   2930             case 8:
   2931                *p++ = 0xDD;
   2932                p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr);
   2933                break;
   2934             case 10:
   2935                *p++ = 0xDB;
   2936                p = doAMode_M_enc(p, 7/*subopcode*/, i->Xin.FpLdSt.addr);
   2937                break;
   2938             default:
   2939                vpanic("emitX86Instr(FpLdSt,store)");
   2940          }
   2941          goto done;
   2942       }
   2943       break;
   2944 
   2945    case Xin_FpLdStI:
   2946       if (i->Xin.FpLdStI.isLoad) {
   2947          /* Load from memory into %fakeN, converting from an int.
   2948             --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
   2949          */
   2950          switch (i->Xin.FpLdStI.sz) {
   2951             case 8:  opc = 0xDF; subopc_imm = 5; break;
   2952             case 4:  opc = 0xDB; subopc_imm = 0; break;
   2953             case 2:  vassert(0); opc = 0xDF; subopc_imm = 0; break;
   2954             default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
   2955          }
   2956          p = do_ffree_st7(p);
   2957          *p++ = toUChar(opc);
   2958          p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr);
   2959          p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdStI.reg));
   2960          goto done;
   2961       } else {
   2962          /* Store from %fakeN into memory, converting to an int.
   2963             --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
   2964 	 */
   2965          switch (i->Xin.FpLdStI.sz) {
   2966             case 8:  opc = 0xDF; subopc_imm = 7; break;
   2967             case 4:  opc = 0xDB; subopc_imm = 3; break;
   2968             case 2:  opc = 0xDF; subopc_imm = 3; break;
   2969             default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
   2970          }
   2971          p = do_ffree_st7(p);
   2972          p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdStI.reg));
   2973          *p++ = toUChar(opc);
   2974          p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr);
   2975          goto done;
   2976       }
   2977       break;
   2978 
   2979    case Xin_Fp64to32:
   2980       /* ffree %st7 ; fld %st(src) */
   2981       p = do_ffree_st7(p);
   2982       p = do_fld_st(p, 0+fregEnc(i->Xin.Fp64to32.src));
   2983       /* subl $4, %esp */
   2984       *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
   2985       /* fstps (%esp) */
   2986       *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
   2987       /* flds (%esp) */
   2988       *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
   2989       /* addl $4, %esp */
   2990       *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
   2991       /* fstp %st(1+dst) */
   2992       p = do_fstp_st(p, 1+fregEnc(i->Xin.Fp64to32.dst));
   2993       goto done;
   2994 
   2995    case Xin_FpCMov:
   2996       /* jmp fwds if !condition */
   2997       *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
   2998       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   2999       ptmp = p;
   3000 
   3001       /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
   3002       p = do_ffree_st7(p);
   3003       p = do_fld_st(p, 0+fregEnc(i->Xin.FpCMov.src));
   3004       p = do_fstp_st(p, 1+fregEnc(i->Xin.FpCMov.dst));
   3005 
   3006       /* Fill in the jump offset. */
   3007       *(ptmp-1) = toUChar(p - ptmp);
   3008       goto done;
   3009 
   3010    case Xin_FpLdCW:
   3011       *p++ = 0xD9;
   3012       p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdCW.addr);
   3013       goto done;
   3014 
   3015    case Xin_FpStSW_AX:
   3016       /* note, this emits fnstsw %ax, not fstsw %ax */
   3017       *p++ = 0xDF;
   3018       *p++ = 0xE0;
   3019       goto done;
   3020 
   3021    case Xin_FpCmp:
   3022       /* gcmp %fL, %fR, %dst
   3023          -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
   3024             fnstsw %ax ; movl %eax, %dst
   3025       */
   3026       /* ffree %st7 */
   3027       p = do_ffree_st7(p);
   3028       /* fpush %fL */
   3029       p = do_fld_st(p, 0+fregEnc(i->Xin.FpCmp.srcL));
   3030       /* fucomp %(fR+1) */
   3031       *p++ = 0xDD;
   3032       *p++ = toUChar(0xE8 + (7 & (1+fregEnc(i->Xin.FpCmp.srcR))));
   3033       /* fnstsw %ax */
   3034       *p++ = 0xDF;
   3035       *p++ = 0xE0;
   3036       /*  movl %eax, %dst */
   3037       *p++ = 0x89;
   3038       p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
   3039       goto done;
   3040 
   3041    case Xin_SseConst: {
   3042       UShort con = i->Xin.SseConst.con;
   3043       p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
   3044       p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
   3045       p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
   3046       p = push_word_from_tags(p, toUShort(con & 0xF));
   3047       /* movl (%esp), %xmm-dst */
   3048       *p++ = 0x0F;
   3049       *p++ = 0x10;
   3050       *p++ = toUChar(0x04 + 8 * (7 & vregEnc(i->Xin.SseConst.dst)));
   3051       *p++ = 0x24;
   3052       /* addl $16, %esp */
   3053       *p++ = 0x83;
   3054       *p++ = 0xC4;
   3055       *p++ = 0x10;
   3056       goto done;
   3057    }
   3058 
   3059    case Xin_SseLdSt:
   3060       *p++ = 0x0F;
   3061       *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
   3062       p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdSt.reg), i->Xin.SseLdSt.addr);
   3063       goto done;
   3064 
   3065    case Xin_SseLdzLO:
   3066       vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
   3067       /* movs[sd] amode, %xmm-dst */
   3068       *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
   3069       *p++ = 0x0F;
   3070       *p++ = 0x10;
   3071       p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdzLO.reg), i->Xin.SseLdzLO.addr);
   3072       goto done;
   3073 
   3074    case Xin_Sse32Fx4:
   3075       xtra = 0;
   3076       *p++ = 0x0F;
   3077       switch (i->Xin.Sse32Fx4.op) {
   3078          case Xsse_ADDF:   *p++ = 0x58; break;
   3079          case Xsse_DIVF:   *p++ = 0x5E; break;
   3080          case Xsse_MAXF:   *p++ = 0x5F; break;
   3081          case Xsse_MINF:   *p++ = 0x5D; break;
   3082          case Xsse_MULF:   *p++ = 0x59; break;
   3083          case Xsse_RCPF:   *p++ = 0x53; break;
   3084          case Xsse_RSQRTF: *p++ = 0x52; break;
   3085          case Xsse_SQRTF:  *p++ = 0x51; break;
   3086          case Xsse_SUBF:   *p++ = 0x5C; break;
   3087          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3088          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3089          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3090          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3091          default: goto bad;
   3092       }
   3093       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32Fx4.dst),
   3094                                vregEnc(i->Xin.Sse32Fx4.src) );
   3095       if (xtra & 0x100)
   3096          *p++ = toUChar(xtra & 0xFF);
   3097       goto done;
   3098 
   3099    case Xin_Sse64Fx2:
   3100       xtra = 0;
   3101       *p++ = 0x66;
   3102       *p++ = 0x0F;
   3103       switch (i->Xin.Sse64Fx2.op) {
   3104          case Xsse_ADDF:   *p++ = 0x58; break;
   3105          case Xsse_DIVF:   *p++ = 0x5E; break;
   3106          case Xsse_MAXF:   *p++ = 0x5F; break;
   3107          case Xsse_MINF:   *p++ = 0x5D; break;
   3108          case Xsse_MULF:   *p++ = 0x59; break;
   3109          case Xsse_RCPF:   *p++ = 0x53; break;
   3110          case Xsse_RSQRTF: *p++ = 0x52; break;
   3111          case Xsse_SQRTF:  *p++ = 0x51; break;
   3112          case Xsse_SUBF:   *p++ = 0x5C; break;
   3113          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3114          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3115          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3116          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3117          default: goto bad;
   3118       }
   3119       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64Fx2.dst),
   3120                                vregEnc(i->Xin.Sse64Fx2.src) );
   3121       if (xtra & 0x100)
   3122          *p++ = toUChar(xtra & 0xFF);
   3123       goto done;
   3124 
   3125    case Xin_Sse32FLo:
   3126       xtra = 0;
   3127       *p++ = 0xF3;
   3128       *p++ = 0x0F;
   3129       switch (i->Xin.Sse32FLo.op) {
   3130          case Xsse_ADDF:   *p++ = 0x58; break;
   3131          case Xsse_DIVF:   *p++ = 0x5E; break;
   3132          case Xsse_MAXF:   *p++ = 0x5F; break;
   3133          case Xsse_MINF:   *p++ = 0x5D; break;
   3134          case Xsse_MULF:   *p++ = 0x59; break;
   3135          case Xsse_RCPF:   *p++ = 0x53; break;
   3136          case Xsse_RSQRTF: *p++ = 0x52; break;
   3137          case Xsse_SQRTF:  *p++ = 0x51; break;
   3138          case Xsse_SUBF:   *p++ = 0x5C; break;
   3139          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3140          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3141          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3142          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3143          default: goto bad;
   3144       }
   3145       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32FLo.dst),
   3146                                vregEnc(i->Xin.Sse32FLo.src) );
   3147       if (xtra & 0x100)
   3148          *p++ = toUChar(xtra & 0xFF);
   3149       goto done;
   3150 
   3151    case Xin_Sse64FLo:
   3152       xtra = 0;
   3153       *p++ = 0xF2;
   3154       *p++ = 0x0F;
   3155       switch (i->Xin.Sse64FLo.op) {
   3156          case Xsse_ADDF:   *p++ = 0x58; break;
   3157          case Xsse_DIVF:   *p++ = 0x5E; break;
   3158          case Xsse_MAXF:   *p++ = 0x5F; break;
   3159          case Xsse_MINF:   *p++ = 0x5D; break;
   3160          case Xsse_MULF:   *p++ = 0x59; break;
   3161          case Xsse_RCPF:   *p++ = 0x53; break;
   3162          case Xsse_RSQRTF: *p++ = 0x52; break;
   3163          case Xsse_SQRTF:  *p++ = 0x51; break;
   3164          case Xsse_SUBF:   *p++ = 0x5C; break;
   3165          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3166          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3167          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3168          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3169          default: goto bad;
   3170       }
   3171       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64FLo.dst),
   3172                                vregEnc(i->Xin.Sse64FLo.src) );
   3173       if (xtra & 0x100)
   3174          *p++ = toUChar(xtra & 0xFF);
   3175       goto done;
   3176 
   3177    case Xin_SseReRg:
   3178 #     define XX(_n) *p++ = (_n)
   3179       switch (i->Xin.SseReRg.op) {
   3180          case Xsse_MOV:     /*movups*/ XX(0x0F); XX(0x10); break;
   3181          case Xsse_OR:                 XX(0x0F); XX(0x56); break;
   3182          case Xsse_XOR:                XX(0x0F); XX(0x57); break;
   3183          case Xsse_AND:                XX(0x0F); XX(0x54); break;
   3184          case Xsse_PACKSSD:  XX(0x66); XX(0x0F); XX(0x6B); break;
   3185          case Xsse_PACKSSW:  XX(0x66); XX(0x0F); XX(0x63); break;
   3186          case Xsse_PACKUSW:  XX(0x66); XX(0x0F); XX(0x67); break;
   3187          case Xsse_ADD8:     XX(0x66); XX(0x0F); XX(0xFC); break;
   3188          case Xsse_ADD16:    XX(0x66); XX(0x0F); XX(0xFD); break;
   3189          case Xsse_ADD32:    XX(0x66); XX(0x0F); XX(0xFE); break;
   3190          case Xsse_ADD64:    XX(0x66); XX(0x0F); XX(0xD4); break;
   3191          case Xsse_QADD8S:   XX(0x66); XX(0x0F); XX(0xEC); break;
   3192          case Xsse_QADD16S:  XX(0x66); XX(0x0F); XX(0xED); break;
   3193          case Xsse_QADD8U:   XX(0x66); XX(0x0F); XX(0xDC); break;
   3194          case Xsse_QADD16U:  XX(0x66); XX(0x0F); XX(0xDD); break;
   3195          case Xsse_AVG8U:    XX(0x66); XX(0x0F); XX(0xE0); break;
   3196          case Xsse_AVG16U:   XX(0x66); XX(0x0F); XX(0xE3); break;
   3197          case Xsse_CMPEQ8:   XX(0x66); XX(0x0F); XX(0x74); break;
   3198          case Xsse_CMPEQ16:  XX(0x66); XX(0x0F); XX(0x75); break;
   3199          case Xsse_CMPEQ32:  XX(0x66); XX(0x0F); XX(0x76); break;
   3200          case Xsse_CMPGT8S:  XX(0x66); XX(0x0F); XX(0x64); break;
   3201          case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
   3202          case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
   3203          case Xsse_MAX16S:   XX(0x66); XX(0x0F); XX(0xEE); break;
   3204          case Xsse_MAX8U:    XX(0x66); XX(0x0F); XX(0xDE); break;
   3205          case Xsse_MIN16S:   XX(0x66); XX(0x0F); XX(0xEA); break;
   3206          case Xsse_MIN8U:    XX(0x66); XX(0x0F); XX(0xDA); break;
   3207          case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
   3208          case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
   3209          case Xsse_MUL16:    XX(0x66); XX(0x0F); XX(0xD5); break;
   3210          case Xsse_SHL16:    XX(0x66); XX(0x0F); XX(0xF1); break;
   3211          case Xsse_SHL32:    XX(0x66); XX(0x0F); XX(0xF2); break;
   3212          case Xsse_SHL64:    XX(0x66); XX(0x0F); XX(0xF3); break;
   3213          case Xsse_SAR16:    XX(0x66); XX(0x0F); XX(0xE1); break;
   3214          case Xsse_SAR32:    XX(0x66); XX(0x0F); XX(0xE2); break;
   3215          case Xsse_SHR16:    XX(0x66); XX(0x0F); XX(0xD1); break;
   3216          case Xsse_SHR32:    XX(0x66); XX(0x0F); XX(0xD2); break;
   3217          case Xsse_SHR64:    XX(0x66); XX(0x0F); XX(0xD3); break;
   3218          case Xsse_SUB8:     XX(0x66); XX(0x0F); XX(0xF8); break;
   3219          case Xsse_SUB16:    XX(0x66); XX(0x0F); XX(0xF9); break;
   3220          case Xsse_SUB32:    XX(0x66); XX(0x0F); XX(0xFA); break;
   3221          case Xsse_SUB64:    XX(0x66); XX(0x0F); XX(0xFB); break;
   3222          case Xsse_QSUB8S:   XX(0x66); XX(0x0F); XX(0xE8); break;
   3223          case Xsse_QSUB16S:  XX(0x66); XX(0x0F); XX(0xE9); break;
   3224          case Xsse_QSUB8U:   XX(0x66); XX(0x0F); XX(0xD8); break;
   3225          case Xsse_QSUB16U:  XX(0x66); XX(0x0F); XX(0xD9); break;
   3226          case Xsse_UNPCKHB:  XX(0x66); XX(0x0F); XX(0x68); break;
   3227          case Xsse_UNPCKHW:  XX(0x66); XX(0x0F); XX(0x69); break;
   3228          case Xsse_UNPCKHD:  XX(0x66); XX(0x0F); XX(0x6A); break;
   3229          case Xsse_UNPCKHQ:  XX(0x66); XX(0x0F); XX(0x6D); break;
   3230          case Xsse_UNPCKLB:  XX(0x66); XX(0x0F); XX(0x60); break;
   3231          case Xsse_UNPCKLW:  XX(0x66); XX(0x0F); XX(0x61); break;
   3232          case Xsse_UNPCKLD:  XX(0x66); XX(0x0F); XX(0x62); break;
   3233          case Xsse_UNPCKLQ:  XX(0x66); XX(0x0F); XX(0x6C); break;
   3234          default: goto bad;
   3235       }
   3236       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseReRg.dst),
   3237                                vregEnc(i->Xin.SseReRg.src) );
   3238 #     undef XX
   3239       goto done;
   3240 
   3241    case Xin_SseCMov:
   3242       /* jmp fwds if !condition */
   3243       *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
   3244       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   3245       ptmp = p;
   3246 
   3247       /* movaps %src, %dst */
   3248       *p++ = 0x0F;
   3249       *p++ = 0x28;
   3250       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseCMov.dst),
   3251                                vregEnc(i->Xin.SseCMov.src) );
   3252 
   3253       /* Fill in the jump offset. */
   3254       *(ptmp-1) = toUChar(p - ptmp);
   3255       goto done;
   3256 
   3257    case Xin_SseShuf:
   3258       *p++ = 0x66;
   3259       *p++ = 0x0F;
   3260       *p++ = 0x70;
   3261       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseShuf.dst),
   3262                                vregEnc(i->Xin.SseShuf.src) );
   3263       *p++ = (UChar)(i->Xin.SseShuf.order);
   3264       goto done;
   3265 
   3266    case Xin_EvCheck: {
   3267       /* We generate:
   3268             (3 bytes)  decl 4(%ebp)    4 == offsetof(host_EvC_COUNTER)
   3269             (2 bytes)  jns  nofail     expected taken
   3270             (3 bytes)  jmp* 0(%ebp)    0 == offsetof(host_EvC_FAILADDR)
   3271             nofail:
   3272       */
   3273       /* This is heavily asserted re instruction lengths.  It needs to
   3274          be.  If we get given unexpected forms of .amCounter or
   3275          .amFailAddr -- basically, anything that's not of the form
   3276          uimm7(%ebp) -- they are likely to fail. */
   3277       /* Note also that after the decl we must be very careful not to
   3278          read the carry flag, else we get a partial flags stall.
   3279          js/jns avoids that, though. */
   3280       UChar* p0 = p;
   3281       /* ---  decl 8(%ebp) --- */
   3282       /* "1" because + there's no register in this encoding;
   3283          instead the register + field is used as a sub opcode.  The
   3284          encoding for "decl r/m32" + is FF /1, hence the "1". */
   3285       *p++ = 0xFF;
   3286       p = doAMode_M_enc(p, 1, i->Xin.EvCheck.amCounter);
   3287       vassert(p - p0 == 3);
   3288       /* --- jns nofail --- */
   3289       *p++ = 0x79;
   3290       *p++ = 0x03; /* need to check this 0x03 after the next insn */
   3291       vassert(p - p0 == 5);
   3292       /* --- jmp* 0(%ebp) --- */
   3293       /* The encoding is FF /4. */
   3294       *p++ = 0xFF;
   3295       p = doAMode_M_enc(p, 4, i->Xin.EvCheck.amFailAddr);
   3296       vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
   3297       /* And crosscheck .. */
   3298       vassert(evCheckSzB_X86() == 8);
   3299       goto done;
   3300    }
   3301 
   3302    case Xin_ProfInc: {
   3303       /* We generate   addl $1,NotKnownYet
   3304                        adcl $0,NotKnownYet+4
   3305          in the expectation that a later call to LibVEX_patchProfCtr
   3306          will be used to fill in the immediate fields once the right
   3307          value is known.
   3308            83 05  00 00 00 00  01
   3309            83 15  00 00 00 00  00
   3310       */
   3311       *p++ = 0x83; *p++ = 0x05;
   3312       *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
   3313       *p++ = 0x01;
   3314       *p++ = 0x83; *p++ = 0x15;
   3315       *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
   3316       *p++ = 0x00;
   3317       /* Tell the caller .. */
   3318       vassert(!(*is_profInc));
   3319       *is_profInc = True;
   3320       goto done;
   3321    }
   3322 
   3323    default:
   3324       goto bad;
   3325    }
   3326 
   3327   bad:
   3328    ppX86Instr(i, mode64);
   3329    vpanic("emit_X86Instr");
   3330    /*NOTREACHED*/
   3331 
   3332   done:
   3333    vassert(p - &buf[0] <= 32);
   3334    return p - &buf[0];
   3335 }
   3336 
   3337 
   3338 /* How big is an event check?  See case for Xin_EvCheck in
   3339    emit_X86Instr just above.  That crosschecks what this returns, so
   3340    we can tell if we're inconsistent. */
   3341 Int evCheckSzB_X86 (void)
   3342 {
   3343    return 8;
   3344 }
   3345 
   3346 
   3347 /* NB: what goes on here has to be very closely coordinated with the
   3348    emitInstr case for XDirect, above. */
   3349 VexInvalRange chainXDirect_X86 ( VexEndness endness_host,
   3350                                  void* place_to_chain,
   3351                                  const void* disp_cp_chain_me_EXPECTED,
   3352                                  const void* place_to_jump_to )
   3353 {
   3354    vassert(endness_host == VexEndnessLE);
   3355 
   3356    /* What we're expecting to see is:
   3357         movl $disp_cp_chain_me_EXPECTED, %edx
   3358         call *%edx
   3359       viz
   3360         BA <4 bytes value == disp_cp_chain_me_EXPECTED>
   3361         FF D2
   3362    */
   3363    UChar* p = (UChar*)place_to_chain;
   3364    vassert(p[0] == 0xBA);
   3365    vassert(read_misaligned_UInt_LE(&p[1])
   3366            == (UInt)(Addr)disp_cp_chain_me_EXPECTED);
   3367    vassert(p[5] == 0xFF);
   3368    vassert(p[6] == 0xD2);
   3369    /* And what we want to change it to is:
   3370           jmp disp32   where disp32 is relative to the next insn
   3371           ud2;
   3372         viz
   3373           E9 <4 bytes == disp32>
   3374           0F 0B
   3375       The replacement has the same length as the original.
   3376    */
   3377    /* This is the delta we need to put into a JMP d32 insn.  It's
   3378       relative to the start of the next insn, hence the -5.  */
   3379    Long delta = (Long)((const UChar *)place_to_jump_to - p) - 5;
   3380 
   3381    /* And make the modifications. */
   3382    p[0] = 0xE9;
   3383    write_misaligned_UInt_LE(&p[1], (UInt)(ULong)delta);
   3384    p[5] = 0x0F; p[6] = 0x0B;
   3385    /* sanity check on the delta -- top 32 are all 0 or all 1 */
   3386    delta >>= 32;
   3387    vassert(delta == 0LL || delta == -1LL);
   3388    VexInvalRange vir = { (HWord)place_to_chain, 7 };
   3389    return vir;
   3390 }
   3391 
   3392 
   3393 /* NB: what goes on here has to be very closely coordinated with the
   3394    emitInstr case for XDirect, above. */
   3395 VexInvalRange unchainXDirect_X86 ( VexEndness endness_host,
   3396                                    void* place_to_unchain,
   3397                                    const void* place_to_jump_to_EXPECTED,
   3398                                    const void* disp_cp_chain_me )
   3399 {
   3400    vassert(endness_host == VexEndnessLE);
   3401 
   3402    /* What we're expecting to see is:
   3403           jmp d32
   3404           ud2;
   3405        viz
   3406           E9 <4 bytes == disp32>
   3407           0F 0B
   3408    */
   3409    UChar* p     = (UChar*)place_to_unchain;
   3410    Bool   valid = False;
   3411    if (p[0] == 0xE9
   3412        && p[5] == 0x0F && p[6]  == 0x0B) {
   3413       /* Check the offset is right. */
   3414       Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
   3415       if ((UChar*)p + 5 + s32 == place_to_jump_to_EXPECTED) {
   3416          valid = True;
   3417          if (0)
   3418             vex_printf("QQQ unchainXDirect_X86: found valid\n");
   3419       }
   3420    }
   3421    vassert(valid);
   3422    /* And what we want to change it to is:
   3423          movl $disp_cp_chain_me, %edx
   3424          call *%edx
   3425       viz
   3426          BA <4 bytes value == disp_cp_chain_me_EXPECTED>
   3427          FF D2
   3428       So it's the same length (convenient, huh).
   3429    */
   3430    p[0] = 0xBA;
   3431    write_misaligned_UInt_LE(&p[1], (UInt)(Addr)disp_cp_chain_me);
   3432    p[5] = 0xFF;
   3433    p[6] = 0xD2;
   3434    VexInvalRange vir = { (HWord)place_to_unchain, 7 };
   3435    return vir;
   3436 }
   3437 
   3438 
   3439 /* Patch the counter address into a profile inc point, as previously
   3440    created by the Xin_ProfInc case for emit_X86Instr. */
   3441 VexInvalRange patchProfInc_X86 ( VexEndness endness_host,
   3442                                  void*  place_to_patch,
   3443                                  const ULong* location_of_counter )
   3444 {
   3445    vassert(endness_host == VexEndnessLE);
   3446    vassert(sizeof(ULong*) == 4);
   3447    UChar* p = (UChar*)place_to_patch;
   3448    vassert(p[0] == 0x83);
   3449    vassert(p[1] == 0x05);
   3450    vassert(p[2] == 0x00);
   3451    vassert(p[3] == 0x00);
   3452    vassert(p[4] == 0x00);
   3453    vassert(p[5] == 0x00);
   3454    vassert(p[6] == 0x01);
   3455    vassert(p[7] == 0x83);
   3456    vassert(p[8] == 0x15);
   3457    vassert(p[9] == 0x00);
   3458    vassert(p[10] == 0x00);
   3459    vassert(p[11] == 0x00);
   3460    vassert(p[12] == 0x00);
   3461    vassert(p[13] == 0x00);
   3462    UInt imm32 = (UInt)(Addr)location_of_counter;
   3463    p[2] = imm32 & 0xFF; imm32 >>= 8;
   3464    p[3] = imm32 & 0xFF; imm32 >>= 8;
   3465    p[4] = imm32 & 0xFF; imm32 >>= 8;
   3466    p[5] = imm32 & 0xFF;
   3467    imm32 = 4 + (UInt)(Addr)location_of_counter;
   3468    p[9]  = imm32 & 0xFF; imm32 >>= 8;
   3469    p[10] = imm32 & 0xFF; imm32 >>= 8;
   3470    p[11] = imm32 & 0xFF; imm32 >>= 8;
   3471    p[12] = imm32 & 0xFF;
   3472    VexInvalRange vir = { (HWord)place_to_patch, 14 };
   3473    return vir;
   3474 }
   3475 
   3476 
   3477 /*---------------------------------------------------------------*/
   3478 /*--- end                                     host_x86_defs.c ---*/
   3479 /*---------------------------------------------------------------*/
   3480