Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_x86_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2013 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 #include "libvex_trc_values.h"
     39 
     40 #include "main_util.h"
     41 #include "host_generic_regs.h"
     42 #include "host_x86_defs.h"
     43 
     44 
     45 /* --------- Registers. --------- */
     46 
     47 void ppHRegX86 ( HReg reg )
     48 {
     49    Int r;
     50    static const HChar* ireg32_names[8]
     51      = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
     52    /* Be generic for all virtual regs. */
     53    if (hregIsVirtual(reg)) {
     54       ppHReg(reg);
     55       return;
     56    }
     57    /* But specific for real regs. */
     58    switch (hregClass(reg)) {
     59       case HRcInt32:
     60          r = hregNumber(reg);
     61          vassert(r >= 0 && r < 8);
     62          vex_printf("%s", ireg32_names[r]);
     63          return;
     64       case HRcFlt64:
     65          r = hregNumber(reg);
     66          vassert(r >= 0 && r < 6);
     67          vex_printf("%%fake%d", r);
     68          return;
     69       case HRcVec128:
     70          r = hregNumber(reg);
     71          vassert(r >= 0 && r < 8);
     72          vex_printf("%%xmm%d", r);
     73          return;
     74       default:
     75          vpanic("ppHRegX86");
     76    }
     77 }
     78 
     79 HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); }
     80 HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); }
     81 HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); }
     82 HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); }
     83 HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); }
     84 HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); }
     85 HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); }
     86 HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); }
     87 
     88 HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
     89 HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
     90 HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
     91 HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
     92 HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
     93 HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
     94 
     95 HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); }
     96 HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); }
     97 HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); }
     98 HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); }
     99 HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); }
    100 HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); }
    101 HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); }
    102 HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); }
    103 
    104 
    105 void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
    106 {
    107    *nregs = 20;
    108    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
    109    (*arr)[0] = hregX86_EAX();
    110    (*arr)[1] = hregX86_EBX();
    111    (*arr)[2] = hregX86_ECX();
    112    (*arr)[3] = hregX86_EDX();
    113    (*arr)[4] = hregX86_ESI();
    114    (*arr)[5] = hregX86_EDI();
    115    (*arr)[6] = hregX86_FAKE0();
    116    (*arr)[7] = hregX86_FAKE1();
    117    (*arr)[8] = hregX86_FAKE2();
    118    (*arr)[9] = hregX86_FAKE3();
    119    (*arr)[10] = hregX86_FAKE4();
    120    (*arr)[11] = hregX86_FAKE5();
    121    (*arr)[12] = hregX86_XMM0();
    122    (*arr)[13] = hregX86_XMM1();
    123    (*arr)[14] = hregX86_XMM2();
    124    (*arr)[15] = hregX86_XMM3();
    125    (*arr)[16] = hregX86_XMM4();
    126    (*arr)[17] = hregX86_XMM5();
    127    (*arr)[18] = hregX86_XMM6();
    128    (*arr)[19] = hregX86_XMM7();
    129 }
    130 
    131 
    132 /* --------- Condition codes, Intel encoding. --------- */
    133 
    134 const HChar* showX86CondCode ( X86CondCode cond )
    135 {
    136    switch (cond) {
    137       case Xcc_O:      return "o";
    138       case Xcc_NO:     return "no";
    139       case Xcc_B:      return "b";
    140       case Xcc_NB:     return "nb";
    141       case Xcc_Z:      return "z";
    142       case Xcc_NZ:     return "nz";
    143       case Xcc_BE:     return "be";
    144       case Xcc_NBE:    return "nbe";
    145       case Xcc_S:      return "s";
    146       case Xcc_NS:     return "ns";
    147       case Xcc_P:      return "p";
    148       case Xcc_NP:     return "np";
    149       case Xcc_L:      return "l";
    150       case Xcc_NL:     return "nl";
    151       case Xcc_LE:     return "le";
    152       case Xcc_NLE:    return "nle";
    153       case Xcc_ALWAYS: return "ALWAYS";
    154       default: vpanic("ppX86CondCode");
    155    }
    156 }
    157 
    158 
    159 /* --------- X86AMode: memory address expressions. --------- */
    160 
    161 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
    162    X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
    163    am->tag = Xam_IR;
    164    am->Xam.IR.imm = imm32;
    165    am->Xam.IR.reg = reg;
    166    return am;
    167 }
    168 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
    169    X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
    170    am->tag = Xam_IRRS;
    171    am->Xam.IRRS.imm = imm32;
    172    am->Xam.IRRS.base = base;
    173    am->Xam.IRRS.index = indEx;
    174    am->Xam.IRRS.shift = shift;
    175    vassert(shift >= 0 && shift <= 3);
    176    return am;
    177 }
    178 
    179 X86AMode* dopyX86AMode ( X86AMode* am ) {
    180    switch (am->tag) {
    181       case Xam_IR:
    182          return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
    183       case Xam_IRRS:
    184          return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
    185                                am->Xam.IRRS.index, am->Xam.IRRS.shift );
    186       default:
    187          vpanic("dopyX86AMode");
    188    }
    189 }
    190 
    191 void ppX86AMode ( X86AMode* am ) {
    192    switch (am->tag) {
    193       case Xam_IR:
    194          if (am->Xam.IR.imm == 0)
    195             vex_printf("(");
    196          else
    197             vex_printf("0x%x(", am->Xam.IR.imm);
    198          ppHRegX86(am->Xam.IR.reg);
    199          vex_printf(")");
    200          return;
    201       case Xam_IRRS:
    202          vex_printf("0x%x(", am->Xam.IRRS.imm);
    203          ppHRegX86(am->Xam.IRRS.base);
    204          vex_printf(",");
    205          ppHRegX86(am->Xam.IRRS.index);
    206          vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
    207          return;
    208       default:
    209          vpanic("ppX86AMode");
    210    }
    211 }
    212 
    213 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
    214    switch (am->tag) {
    215       case Xam_IR:
    216          addHRegUse(u, HRmRead, am->Xam.IR.reg);
    217          return;
    218       case Xam_IRRS:
    219          addHRegUse(u, HRmRead, am->Xam.IRRS.base);
    220          addHRegUse(u, HRmRead, am->Xam.IRRS.index);
    221          return;
    222       default:
    223          vpanic("addRegUsage_X86AMode");
    224    }
    225 }
    226 
    227 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
    228    switch (am->tag) {
    229       case Xam_IR:
    230          am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
    231          return;
    232       case Xam_IRRS:
    233          am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
    234          am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
    235          return;
    236       default:
    237          vpanic("mapRegs_X86AMode");
    238    }
    239 }
    240 
    241 /* --------- Operand, which can be reg, immediate or memory. --------- */
    242 
    243 X86RMI* X86RMI_Imm ( UInt imm32 ) {
    244    X86RMI* op         = LibVEX_Alloc(sizeof(X86RMI));
    245    op->tag            = Xrmi_Imm;
    246    op->Xrmi.Imm.imm32 = imm32;
    247    return op;
    248 }
    249 X86RMI* X86RMI_Reg ( HReg reg ) {
    250    X86RMI* op       = LibVEX_Alloc(sizeof(X86RMI));
    251    op->tag          = Xrmi_Reg;
    252    op->Xrmi.Reg.reg = reg;
    253    return op;
    254 }
    255 X86RMI* X86RMI_Mem ( X86AMode* am ) {
    256    X86RMI* op      = LibVEX_Alloc(sizeof(X86RMI));
    257    op->tag         = Xrmi_Mem;
    258    op->Xrmi.Mem.am = am;
    259    return op;
    260 }
    261 
    262 void ppX86RMI ( X86RMI* op ) {
    263    switch (op->tag) {
    264       case Xrmi_Imm:
    265          vex_printf("$0x%x", op->Xrmi.Imm.imm32);
    266          return;
    267       case Xrmi_Reg:
    268          ppHRegX86(op->Xrmi.Reg.reg);
    269          return;
    270       case Xrmi_Mem:
    271          ppX86AMode(op->Xrmi.Mem.am);
    272          return;
    273      default:
    274          vpanic("ppX86RMI");
    275    }
    276 }
    277 
    278 /* An X86RMI can only be used in a "read" context (what would it mean
    279    to write or modify a literal?) and so we enumerate its registers
    280    accordingly. */
    281 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
    282    switch (op->tag) {
    283       case Xrmi_Imm:
    284          return;
    285       case Xrmi_Reg:
    286          addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
    287          return;
    288       case Xrmi_Mem:
    289          addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
    290          return;
    291       default:
    292          vpanic("addRegUsage_X86RMI");
    293    }
    294 }
    295 
    296 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
    297    switch (op->tag) {
    298       case Xrmi_Imm:
    299          return;
    300       case Xrmi_Reg:
    301          op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
    302          return;
    303       case Xrmi_Mem:
    304          mapRegs_X86AMode(m, op->Xrmi.Mem.am);
    305          return;
    306       default:
    307          vpanic("mapRegs_X86RMI");
    308    }
    309 }
    310 
    311 
    312 /* --------- Operand, which can be reg or immediate only. --------- */
    313 
    314 X86RI* X86RI_Imm ( UInt imm32 ) {
    315    X86RI* op         = LibVEX_Alloc(sizeof(X86RI));
    316    op->tag           = Xri_Imm;
    317    op->Xri.Imm.imm32 = imm32;
    318    return op;
    319 }
    320 X86RI* X86RI_Reg ( HReg reg ) {
    321    X86RI* op       = LibVEX_Alloc(sizeof(X86RI));
    322    op->tag         = Xri_Reg;
    323    op->Xri.Reg.reg = reg;
    324    return op;
    325 }
    326 
    327 void ppX86RI ( X86RI* op ) {
    328    switch (op->tag) {
    329       case Xri_Imm:
    330          vex_printf("$0x%x", op->Xri.Imm.imm32);
    331          return;
    332       case Xri_Reg:
    333          ppHRegX86(op->Xri.Reg.reg);
    334          return;
    335      default:
    336          vpanic("ppX86RI");
    337    }
    338 }
    339 
    340 /* An X86RI can only be used in a "read" context (what would it mean
    341    to write or modify a literal?) and so we enumerate its registers
    342    accordingly. */
    343 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
    344    switch (op->tag) {
    345       case Xri_Imm:
    346          return;
    347       case Xri_Reg:
    348          addHRegUse(u, HRmRead, op->Xri.Reg.reg);
    349          return;
    350       default:
    351          vpanic("addRegUsage_X86RI");
    352    }
    353 }
    354 
    355 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
    356    switch (op->tag) {
    357       case Xri_Imm:
    358          return;
    359       case Xri_Reg:
    360          op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
    361          return;
    362       default:
    363          vpanic("mapRegs_X86RI");
    364    }
    365 }
    366 
    367 
    368 /* --------- Operand, which can be reg or memory only. --------- */
    369 
    370 X86RM* X86RM_Reg ( HReg reg ) {
    371    X86RM* op       = LibVEX_Alloc(sizeof(X86RM));
    372    op->tag         = Xrm_Reg;
    373    op->Xrm.Reg.reg = reg;
    374    return op;
    375 }
    376 X86RM* X86RM_Mem ( X86AMode* am ) {
    377    X86RM* op      = LibVEX_Alloc(sizeof(X86RM));
    378    op->tag        = Xrm_Mem;
    379    op->Xrm.Mem.am = am;
    380    return op;
    381 }
    382 
    383 void ppX86RM ( X86RM* op ) {
    384    switch (op->tag) {
    385       case Xrm_Mem:
    386          ppX86AMode(op->Xrm.Mem.am);
    387          return;
    388       case Xrm_Reg:
    389          ppHRegX86(op->Xrm.Reg.reg);
    390          return;
    391      default:
    392          vpanic("ppX86RM");
    393    }
    394 }
    395 
    396 /* Because an X86RM can be both a source or destination operand, we
    397    have to supply a mode -- pertaining to the operand as a whole --
    398    indicating how it's being used. */
    399 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
    400    switch (op->tag) {
    401       case Xrm_Mem:
    402          /* Memory is read, written or modified.  So we just want to
    403             know the regs read by the amode. */
    404          addRegUsage_X86AMode(u, op->Xrm.Mem.am);
    405          return;
    406       case Xrm_Reg:
    407          /* reg is read, written or modified.  Add it in the
    408             appropriate way. */
    409          addHRegUse(u, mode, op->Xrm.Reg.reg);
    410          return;
    411      default:
    412          vpanic("addRegUsage_X86RM");
    413    }
    414 }
    415 
    416 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
    417 {
    418    switch (op->tag) {
    419       case Xrm_Mem:
    420          mapRegs_X86AMode(m, op->Xrm.Mem.am);
    421          return;
    422       case Xrm_Reg:
    423          op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
    424          return;
    425      default:
    426          vpanic("mapRegs_X86RM");
    427    }
    428 }
    429 
    430 
    431 /* --------- Instructions. --------- */
    432 
    433 const HChar* showX86UnaryOp ( X86UnaryOp op ) {
    434    switch (op) {
    435       case Xun_NOT: return "not";
    436       case Xun_NEG: return "neg";
    437       default: vpanic("showX86UnaryOp");
    438    }
    439 }
    440 
    441 const HChar* showX86AluOp ( X86AluOp op ) {
    442    switch (op) {
    443       case Xalu_MOV:  return "mov";
    444       case Xalu_CMP:  return "cmp";
    445       case Xalu_ADD:  return "add";
    446       case Xalu_SUB:  return "sub";
    447       case Xalu_ADC:  return "adc";
    448       case Xalu_SBB:  return "sbb";
    449       case Xalu_AND:  return "and";
    450       case Xalu_OR:   return "or";
    451       case Xalu_XOR:  return "xor";
    452       case Xalu_MUL:  return "mul";
    453       default: vpanic("showX86AluOp");
    454    }
    455 }
    456 
    457 const HChar* showX86ShiftOp ( X86ShiftOp op ) {
    458    switch (op) {
    459       case Xsh_SHL: return "shl";
    460       case Xsh_SHR: return "shr";
    461       case Xsh_SAR: return "sar";
    462       default: vpanic("showX86ShiftOp");
    463    }
    464 }
    465 
    466 const HChar* showX86FpOp ( X86FpOp op ) {
    467    switch (op) {
    468       case Xfp_ADD:    return "add";
    469       case Xfp_SUB:    return "sub";
    470       case Xfp_MUL:    return "mul";
    471       case Xfp_DIV:    return "div";
    472       case Xfp_SCALE:  return "scale";
    473       case Xfp_ATAN:   return "atan";
    474       case Xfp_YL2X:   return "yl2x";
    475       case Xfp_YL2XP1: return "yl2xp1";
    476       case Xfp_PREM:   return "prem";
    477       case Xfp_PREM1:  return "prem1";
    478       case Xfp_SQRT:   return "sqrt";
    479       case Xfp_ABS:    return "abs";
    480       case Xfp_NEG:    return "chs";
    481       case Xfp_MOV:    return "mov";
    482       case Xfp_SIN:    return "sin";
    483       case Xfp_COS:    return "cos";
    484       case Xfp_TAN:    return "tan";
    485       case Xfp_ROUND:  return "round";
    486       case Xfp_2XM1:   return "2xm1";
    487       default: vpanic("showX86FpOp");
    488    }
    489 }
    490 
    491 const HChar* showX86SseOp ( X86SseOp op ) {
    492    switch (op) {
    493       case Xsse_MOV:      return "mov(?!)";
    494       case Xsse_ADDF:     return "add";
    495       case Xsse_SUBF:     return "sub";
    496       case Xsse_MULF:     return "mul";
    497       case Xsse_DIVF:     return "div";
    498       case Xsse_MAXF:     return "max";
    499       case Xsse_MINF:     return "min";
    500       case Xsse_CMPEQF:   return "cmpFeq";
    501       case Xsse_CMPLTF:   return "cmpFlt";
    502       case Xsse_CMPLEF:   return "cmpFle";
    503       case Xsse_CMPUNF:   return "cmpFun";
    504       case Xsse_RCPF:     return "rcp";
    505       case Xsse_RSQRTF:   return "rsqrt";
    506       case Xsse_SQRTF:    return "sqrt";
    507       case Xsse_AND:      return "and";
    508       case Xsse_OR:       return "or";
    509       case Xsse_XOR:      return "xor";
    510       case Xsse_ANDN:     return "andn";
    511       case Xsse_ADD8:     return "paddb";
    512       case Xsse_ADD16:    return "paddw";
    513       case Xsse_ADD32:    return "paddd";
    514       case Xsse_ADD64:    return "paddq";
    515       case Xsse_QADD8U:   return "paddusb";
    516       case Xsse_QADD16U:  return "paddusw";
    517       case Xsse_QADD8S:   return "paddsb";
    518       case Xsse_QADD16S:  return "paddsw";
    519       case Xsse_SUB8:     return "psubb";
    520       case Xsse_SUB16:    return "psubw";
    521       case Xsse_SUB32:    return "psubd";
    522       case Xsse_SUB64:    return "psubq";
    523       case Xsse_QSUB8U:   return "psubusb";
    524       case Xsse_QSUB16U:  return "psubusw";
    525       case Xsse_QSUB8S:   return "psubsb";
    526       case Xsse_QSUB16S:  return "psubsw";
    527       case Xsse_MUL16:    return "pmullw";
    528       case Xsse_MULHI16U: return "pmulhuw";
    529       case Xsse_MULHI16S: return "pmulhw";
    530       case Xsse_AVG8U:    return "pavgb";
    531       case Xsse_AVG16U:   return "pavgw";
    532       case Xsse_MAX16S:   return "pmaxw";
    533       case Xsse_MAX8U:    return "pmaxub";
    534       case Xsse_MIN16S:   return "pminw";
    535       case Xsse_MIN8U:    return "pminub";
    536       case Xsse_CMPEQ8:   return "pcmpeqb";
    537       case Xsse_CMPEQ16:  return "pcmpeqw";
    538       case Xsse_CMPEQ32:  return "pcmpeqd";
    539       case Xsse_CMPGT8S:  return "pcmpgtb";
    540       case Xsse_CMPGT16S: return "pcmpgtw";
    541       case Xsse_CMPGT32S: return "pcmpgtd";
    542       case Xsse_SHL16:    return "psllw";
    543       case Xsse_SHL32:    return "pslld";
    544       case Xsse_SHL64:    return "psllq";
    545       case Xsse_SHR16:    return "psrlw";
    546       case Xsse_SHR32:    return "psrld";
    547       case Xsse_SHR64:    return "psrlq";
    548       case Xsse_SAR16:    return "psraw";
    549       case Xsse_SAR32:    return "psrad";
    550       case Xsse_PACKSSD:  return "packssdw";
    551       case Xsse_PACKSSW:  return "packsswb";
    552       case Xsse_PACKUSW:  return "packuswb";
    553       case Xsse_UNPCKHB:  return "punpckhb";
    554       case Xsse_UNPCKHW:  return "punpckhw";
    555       case Xsse_UNPCKHD:  return "punpckhd";
    556       case Xsse_UNPCKHQ:  return "punpckhq";
    557       case Xsse_UNPCKLB:  return "punpcklb";
    558       case Xsse_UNPCKLW:  return "punpcklw";
    559       case Xsse_UNPCKLD:  return "punpckld";
    560       case Xsse_UNPCKLQ:  return "punpcklq";
    561       default: vpanic("showX86SseOp");
    562    }
    563 }
    564 
    565 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
    566    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    567    i->tag            = Xin_Alu32R;
    568    i->Xin.Alu32R.op  = op;
    569    i->Xin.Alu32R.src = src;
    570    i->Xin.Alu32R.dst = dst;
    571    return i;
    572 }
    573 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
    574    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    575    i->tag            = Xin_Alu32M;
    576    i->Xin.Alu32M.op  = op;
    577    i->Xin.Alu32M.src = src;
    578    i->Xin.Alu32M.dst = dst;
    579    vassert(op != Xalu_MUL);
    580    return i;
    581 }
    582 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
    583    X86Instr* i     = LibVEX_Alloc(sizeof(X86Instr));
    584    i->tag          = Xin_Sh32;
    585    i->Xin.Sh32.op  = op;
    586    i->Xin.Sh32.src = src;
    587    i->Xin.Sh32.dst = dst;
    588    return i;
    589 }
    590 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
    591    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    592    i->tag              = Xin_Test32;
    593    i->Xin.Test32.imm32 = imm32;
    594    i->Xin.Test32.dst   = dst;
    595    return i;
    596 }
    597 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
    598    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    599    i->tag             = Xin_Unary32;
    600    i->Xin.Unary32.op  = op;
    601    i->Xin.Unary32.dst = dst;
    602    return i;
    603 }
    604 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
    605    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    606    i->tag             = Xin_Lea32;
    607    i->Xin.Lea32.am    = am;
    608    i->Xin.Lea32.dst   = dst;
    609    return i;
    610 }
    611 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
    612    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    613    i->tag             = Xin_MulL;
    614    i->Xin.MulL.syned  = syned;
    615    i->Xin.MulL.src    = src;
    616    return i;
    617 }
    618 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
    619    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    620    i->tag           = Xin_Div;
    621    i->Xin.Div.syned = syned;
    622    i->Xin.Div.src   = src;
    623    return i;
    624 }
    625 X86Instr* X86Instr_Sh3232  ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
    626    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    627    i->tag            = Xin_Sh3232;
    628    i->Xin.Sh3232.op  = op;
    629    i->Xin.Sh3232.amt = amt;
    630    i->Xin.Sh3232.src = src;
    631    i->Xin.Sh3232.dst = dst;
    632    vassert(op == Xsh_SHL || op == Xsh_SHR);
    633    return i;
    634 }
    635 X86Instr* X86Instr_Push( X86RMI* src ) {
    636    X86Instr* i     = LibVEX_Alloc(sizeof(X86Instr));
    637    i->tag          = Xin_Push;
    638    i->Xin.Push.src = src;
    639    return i;
    640 }
    641 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms,
    642                           RetLoc rloc ) {
    643    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    644    i->tag               = Xin_Call;
    645    i->Xin.Call.cond     = cond;
    646    i->Xin.Call.target   = target;
    647    i->Xin.Call.regparms = regparms;
    648    i->Xin.Call.rloc     = rloc;
    649    vassert(regparms >= 0 && regparms <= 3);
    650    vassert(is_sane_RetLoc(rloc));
    651    return i;
    652 }
    653 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
    654                              X86CondCode cond, Bool toFastEP ) {
    655    X86Instr* i             = LibVEX_Alloc(sizeof(X86Instr));
    656    i->tag                  = Xin_XDirect;
    657    i->Xin.XDirect.dstGA    = dstGA;
    658    i->Xin.XDirect.amEIP    = amEIP;
    659    i->Xin.XDirect.cond     = cond;
    660    i->Xin.XDirect.toFastEP = toFastEP;
    661    return i;
    662 }
    663 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
    664                             X86CondCode cond ) {
    665    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    666    i->tag              = Xin_XIndir;
    667    i->Xin.XIndir.dstGA = dstGA;
    668    i->Xin.XIndir.amEIP = amEIP;
    669    i->Xin.XIndir.cond  = cond;
    670    return i;
    671 }
    672 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
    673                                X86CondCode cond, IRJumpKind jk ) {
    674    X86Instr* i            = LibVEX_Alloc(sizeof(X86Instr));
    675    i->tag                 = Xin_XAssisted;
    676    i->Xin.XAssisted.dstGA = dstGA;
    677    i->Xin.XAssisted.amEIP = amEIP;
    678    i->Xin.XAssisted.cond  = cond;
    679    i->Xin.XAssisted.jk    = jk;
    680    return i;
    681 }
    682 X86Instr* X86Instr_CMov32  ( X86CondCode cond, X86RM* src, HReg dst ) {
    683    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    684    i->tag             = Xin_CMov32;
    685    i->Xin.CMov32.cond = cond;
    686    i->Xin.CMov32.src  = src;
    687    i->Xin.CMov32.dst  = dst;
    688    vassert(cond != Xcc_ALWAYS);
    689    return i;
    690 }
    691 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
    692                             X86AMode* src, HReg dst ) {
    693    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    694    i->tag                = Xin_LoadEX;
    695    i->Xin.LoadEX.szSmall = szSmall;
    696    i->Xin.LoadEX.syned   = syned;
    697    i->Xin.LoadEX.src     = src;
    698    i->Xin.LoadEX.dst     = dst;
    699    vassert(szSmall == 1 || szSmall == 2);
    700    return i;
    701 }
    702 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
    703    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    704    i->tag           = Xin_Store;
    705    i->Xin.Store.sz  = sz;
    706    i->Xin.Store.src = src;
    707    i->Xin.Store.dst = dst;
    708    vassert(sz == 1 || sz == 2);
    709    return i;
    710 }
    711 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
    712    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    713    i->tag            = Xin_Set32;
    714    i->Xin.Set32.cond = cond;
    715    i->Xin.Set32.dst  = dst;
    716    return i;
    717 }
    718 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
    719    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    720    i->tag               = Xin_Bsfr32;
    721    i->Xin.Bsfr32.isFwds = isFwds;
    722    i->Xin.Bsfr32.src    = src;
    723    i->Xin.Bsfr32.dst    = dst;
    724    return i;
    725 }
    726 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
    727    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    728    i->tag               = Xin_MFence;
    729    i->Xin.MFence.hwcaps = hwcaps;
    730    vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
    731                             |VEX_HWCAPS_X86_SSE1
    732                             |VEX_HWCAPS_X86_SSE2
    733                             |VEX_HWCAPS_X86_SSE3
    734                             |VEX_HWCAPS_X86_LZCNT)));
    735    return i;
    736 }
    737 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
    738    X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
    739    i->tag           = Xin_ACAS;
    740    i->Xin.ACAS.addr = addr;
    741    i->Xin.ACAS.sz   = sz;
    742    vassert(sz == 4 || sz == 2 || sz == 1);
    743    return i;
    744 }
    745 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
    746    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    747    i->tag            = Xin_DACAS;
    748    i->Xin.DACAS.addr = addr;
    749    return i;
    750 }
    751 
    752 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
    753    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    754    i->tag             = Xin_FpUnary;
    755    i->Xin.FpUnary.op  = op;
    756    i->Xin.FpUnary.src = src;
    757    i->Xin.FpUnary.dst = dst;
    758    return i;
    759 }
    760 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
    761    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    762    i->tag               = Xin_FpBinary;
    763    i->Xin.FpBinary.op   = op;
    764    i->Xin.FpBinary.srcL = srcL;
    765    i->Xin.FpBinary.srcR = srcR;
    766    i->Xin.FpBinary.dst  = dst;
    767    return i;
    768 }
    769 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
    770    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    771    i->tag               = Xin_FpLdSt;
    772    i->Xin.FpLdSt.isLoad = isLoad;
    773    i->Xin.FpLdSt.sz     = sz;
    774    i->Xin.FpLdSt.reg    = reg;
    775    i->Xin.FpLdSt.addr   = addr;
    776    vassert(sz == 4 || sz == 8 || sz == 10);
    777    return i;
    778 }
    779 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
    780                              HReg reg, X86AMode* addr ) {
    781    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    782    i->tag                = Xin_FpLdStI;
    783    i->Xin.FpLdStI.isLoad = isLoad;
    784    i->Xin.FpLdStI.sz     = sz;
    785    i->Xin.FpLdStI.reg    = reg;
    786    i->Xin.FpLdStI.addr   = addr;
    787    vassert(sz == 2 || sz == 4 || sz == 8);
    788    return i;
    789 }
    790 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
    791    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    792    i->tag              = Xin_Fp64to32;
    793    i->Xin.Fp64to32.src = src;
    794    i->Xin.Fp64to32.dst = dst;
    795    return i;
    796 }
    797 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
    798    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    799    i->tag             = Xin_FpCMov;
    800    i->Xin.FpCMov.cond = cond;
    801    i->Xin.FpCMov.src  = src;
    802    i->Xin.FpCMov.dst  = dst;
    803    vassert(cond != Xcc_ALWAYS);
    804    return i;
    805 }
    806 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
    807    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    808    i->tag               = Xin_FpLdCW;
    809    i->Xin.FpLdCW.addr   = addr;
    810    return i;
    811 }
    812 X86Instr* X86Instr_FpStSW_AX ( void ) {
    813    X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
    814    i->tag      = Xin_FpStSW_AX;
    815    return i;
    816 }
    817 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
    818    X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
    819    i->tag            = Xin_FpCmp;
    820    i->Xin.FpCmp.srcL = srcL;
    821    i->Xin.FpCmp.srcR = srcR;
    822    i->Xin.FpCmp.dst  = dst;
    823    return i;
    824 }
    825 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
    826    X86Instr* i            = LibVEX_Alloc(sizeof(X86Instr));
    827    i->tag                 = Xin_SseConst;
    828    i->Xin.SseConst.con    = con;
    829    i->Xin.SseConst.dst    = dst;
    830    vassert(hregClass(dst) == HRcVec128);
    831    return i;
    832 }
    833 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
    834    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    835    i->tag                = Xin_SseLdSt;
    836    i->Xin.SseLdSt.isLoad = isLoad;
    837    i->Xin.SseLdSt.reg    = reg;
    838    i->Xin.SseLdSt.addr   = addr;
    839    return i;
    840 }
    841 X86Instr* X86Instr_SseLdzLO  ( Int sz, HReg reg, X86AMode* addr )
    842 {
    843    X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
    844    i->tag                = Xin_SseLdzLO;
    845    i->Xin.SseLdzLO.sz    = toUChar(sz);
    846    i->Xin.SseLdzLO.reg   = reg;
    847    i->Xin.SseLdzLO.addr  = addr;
    848    vassert(sz == 4 || sz == 8);
    849    return i;
    850 }
    851 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
    852    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    853    i->tag              = Xin_Sse32Fx4;
    854    i->Xin.Sse32Fx4.op  = op;
    855    i->Xin.Sse32Fx4.src = src;
    856    i->Xin.Sse32Fx4.dst = dst;
    857    vassert(op != Xsse_MOV);
    858    return i;
    859 }
    860 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
    861    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    862    i->tag              = Xin_Sse32FLo;
    863    i->Xin.Sse32FLo.op  = op;
    864    i->Xin.Sse32FLo.src = src;
    865    i->Xin.Sse32FLo.dst = dst;
    866    vassert(op != Xsse_MOV);
    867    return i;
    868 }
    869 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
    870    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    871    i->tag              = Xin_Sse64Fx2;
    872    i->Xin.Sse64Fx2.op  = op;
    873    i->Xin.Sse64Fx2.src = src;
    874    i->Xin.Sse64Fx2.dst = dst;
    875    vassert(op != Xsse_MOV);
    876    return i;
    877 }
    878 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
    879    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    880    i->tag              = Xin_Sse64FLo;
    881    i->Xin.Sse64FLo.op  = op;
    882    i->Xin.Sse64FLo.src = src;
    883    i->Xin.Sse64FLo.dst = dst;
    884    vassert(op != Xsse_MOV);
    885    return i;
    886 }
    887 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
    888    X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
    889    i->tag             = Xin_SseReRg;
    890    i->Xin.SseReRg.op  = op;
    891    i->Xin.SseReRg.src = re;
    892    i->Xin.SseReRg.dst = rg;
    893    return i;
    894 }
    895 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
    896    X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
    897    i->tag              = Xin_SseCMov;
    898    i->Xin.SseCMov.cond = cond;
    899    i->Xin.SseCMov.src  = src;
    900    i->Xin.SseCMov.dst  = dst;
    901    vassert(cond != Xcc_ALWAYS);
    902    return i;
    903 }
    904 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
    905    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    906    i->tag               = Xin_SseShuf;
    907    i->Xin.SseShuf.order = order;
    908    i->Xin.SseShuf.src   = src;
    909    i->Xin.SseShuf.dst   = dst;
    910    vassert(order >= 0 && order <= 0xFF);
    911    return i;
    912 }
    913 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
    914                              X86AMode* amFailAddr ) {
    915    X86Instr* i               = LibVEX_Alloc(sizeof(X86Instr));
    916    i->tag                    = Xin_EvCheck;
    917    i->Xin.EvCheck.amCounter  = amCounter;
    918    i->Xin.EvCheck.amFailAddr = amFailAddr;
    919    return i;
    920 }
    921 X86Instr* X86Instr_ProfInc ( void ) {
    922    X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
    923    i->tag      = Xin_ProfInc;
    924    return i;
    925 }
    926 
    927 void ppX86Instr ( X86Instr* i, Bool mode64 ) {
    928    vassert(mode64 == False);
    929    switch (i->tag) {
    930       case Xin_Alu32R:
    931          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
    932          ppX86RMI(i->Xin.Alu32R.src);
    933          vex_printf(",");
    934          ppHRegX86(i->Xin.Alu32R.dst);
    935          return;
    936       case Xin_Alu32M:
    937          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
    938          ppX86RI(i->Xin.Alu32M.src);
    939          vex_printf(",");
    940          ppX86AMode(i->Xin.Alu32M.dst);
    941          return;
    942       case Xin_Sh32:
    943          vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
    944          if (i->Xin.Sh32.src == 0)
    945            vex_printf("%%cl,");
    946          else
    947             vex_printf("$%d,", (Int)i->Xin.Sh32.src);
    948          ppHRegX86(i->Xin.Sh32.dst);
    949          return;
    950       case Xin_Test32:
    951          vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
    952          ppX86RM(i->Xin.Test32.dst);
    953          return;
    954       case Xin_Unary32:
    955          vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
    956          ppHRegX86(i->Xin.Unary32.dst);
    957          return;
    958       case Xin_Lea32:
    959          vex_printf("leal ");
    960          ppX86AMode(i->Xin.Lea32.am);
    961          vex_printf(",");
    962          ppHRegX86(i->Xin.Lea32.dst);
    963          return;
    964       case Xin_MulL:
    965          vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
    966          ppX86RM(i->Xin.MulL.src);
    967          return;
    968       case Xin_Div:
    969          vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
    970          ppX86RM(i->Xin.Div.src);
    971          return;
    972       case Xin_Sh3232:
    973          vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
    974          if (i->Xin.Sh3232.amt == 0)
    975            vex_printf(" %%cl,");
    976          else
    977             vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
    978          ppHRegX86(i->Xin.Sh3232.src);
    979          vex_printf(",");
    980          ppHRegX86(i->Xin.Sh3232.dst);
    981          return;
    982       case Xin_Push:
    983          vex_printf("pushl ");
    984          ppX86RMI(i->Xin.Push.src);
    985          return;
    986       case Xin_Call:
    987          vex_printf("call%s[%d,",
    988                     i->Xin.Call.cond==Xcc_ALWAYS
    989                        ? "" : showX86CondCode(i->Xin.Call.cond),
    990                     i->Xin.Call.regparms);
    991          ppRetLoc(i->Xin.Call.rloc);
    992          vex_printf("] 0x%x", i->Xin.Call.target);
    993          break;
    994       case Xin_XDirect:
    995          vex_printf("(xDirect) ");
    996          vex_printf("if (%%eflags.%s) { ",
    997                     showX86CondCode(i->Xin.XDirect.cond));
    998          vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
    999          ppX86AMode(i->Xin.XDirect.amEIP);
   1000          vex_printf("; ");
   1001          vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
   1002                     i->Xin.XDirect.toFastEP ? "fast" : "slow");
   1003          return;
   1004       case Xin_XIndir:
   1005          vex_printf("(xIndir) ");
   1006          vex_printf("if (%%eflags.%s) { movl ",
   1007                     showX86CondCode(i->Xin.XIndir.cond));
   1008          ppHRegX86(i->Xin.XIndir.dstGA);
   1009          vex_printf(",");
   1010          ppX86AMode(i->Xin.XIndir.amEIP);
   1011          vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
   1012          return;
   1013       case Xin_XAssisted:
   1014          vex_printf("(xAssisted) ");
   1015          vex_printf("if (%%eflags.%s) { ",
   1016                     showX86CondCode(i->Xin.XAssisted.cond));
   1017          vex_printf("movl ");
   1018          ppHRegX86(i->Xin.XAssisted.dstGA);
   1019          vex_printf(",");
   1020          ppX86AMode(i->Xin.XAssisted.amEIP);
   1021          vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
   1022                     (Int)i->Xin.XAssisted.jk);
   1023          vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
   1024          return;
   1025       case Xin_CMov32:
   1026          vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
   1027          ppX86RM(i->Xin.CMov32.src);
   1028          vex_printf(",");
   1029          ppHRegX86(i->Xin.CMov32.dst);
   1030          return;
   1031       case Xin_LoadEX:
   1032          vex_printf("mov%c%cl ",
   1033                     i->Xin.LoadEX.syned ? 's' : 'z',
   1034                     i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
   1035          ppX86AMode(i->Xin.LoadEX.src);
   1036          vex_printf(",");
   1037          ppHRegX86(i->Xin.LoadEX.dst);
   1038          return;
   1039       case Xin_Store:
   1040          vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
   1041          ppHRegX86(i->Xin.Store.src);
   1042          vex_printf(",");
   1043          ppX86AMode(i->Xin.Store.dst);
   1044          return;
   1045       case Xin_Set32:
   1046          vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
   1047          ppHRegX86(i->Xin.Set32.dst);
   1048          return;
   1049       case Xin_Bsfr32:
   1050          vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
   1051          ppHRegX86(i->Xin.Bsfr32.src);
   1052          vex_printf(",");
   1053          ppHRegX86(i->Xin.Bsfr32.dst);
   1054          return;
   1055       case Xin_MFence:
   1056          vex_printf("mfence(%s)",
   1057                     LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
   1058          return;
   1059       case Xin_ACAS:
   1060          vex_printf("lock cmpxchg%c ",
   1061                      i->Xin.ACAS.sz==1 ? 'b'
   1062                                        : i->Xin.ACAS.sz==2 ? 'w' : 'l');
   1063          vex_printf("{%%eax->%%ebx},");
   1064          ppX86AMode(i->Xin.ACAS.addr);
   1065          return;
   1066       case Xin_DACAS:
   1067          vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
   1068          ppX86AMode(i->Xin.DACAS.addr);
   1069          return;
   1070       case Xin_FpUnary:
   1071          vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
   1072          ppHRegX86(i->Xin.FpUnary.src);
   1073          vex_printf(",");
   1074          ppHRegX86(i->Xin.FpUnary.dst);
   1075          break;
   1076       case Xin_FpBinary:
   1077          vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
   1078          ppHRegX86(i->Xin.FpBinary.srcL);
   1079          vex_printf(",");
   1080          ppHRegX86(i->Xin.FpBinary.srcR);
   1081          vex_printf(",");
   1082          ppHRegX86(i->Xin.FpBinary.dst);
   1083          break;
   1084       case Xin_FpLdSt:
   1085          if (i->Xin.FpLdSt.isLoad) {
   1086             vex_printf("gld%c " ,  i->Xin.FpLdSt.sz==10 ? 'T'
   1087                                    : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
   1088             ppX86AMode(i->Xin.FpLdSt.addr);
   1089             vex_printf(", ");
   1090             ppHRegX86(i->Xin.FpLdSt.reg);
   1091          } else {
   1092             vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
   1093                                   : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
   1094             ppHRegX86(i->Xin.FpLdSt.reg);
   1095             vex_printf(", ");
   1096             ppX86AMode(i->Xin.FpLdSt.addr);
   1097          }
   1098          return;
   1099       case Xin_FpLdStI:
   1100          if (i->Xin.FpLdStI.isLoad) {
   1101             vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1102                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1103             ppX86AMode(i->Xin.FpLdStI.addr);
   1104             vex_printf(", ");
   1105             ppHRegX86(i->Xin.FpLdStI.reg);
   1106          } else {
   1107             vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
   1108                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
   1109             ppHRegX86(i->Xin.FpLdStI.reg);
   1110             vex_printf(", ");
   1111             ppX86AMode(i->Xin.FpLdStI.addr);
   1112          }
   1113          return;
   1114       case Xin_Fp64to32:
   1115          vex_printf("gdtof ");
   1116          ppHRegX86(i->Xin.Fp64to32.src);
   1117          vex_printf(",");
   1118          ppHRegX86(i->Xin.Fp64to32.dst);
   1119          return;
   1120       case Xin_FpCMov:
   1121          vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
   1122          ppHRegX86(i->Xin.FpCMov.src);
   1123          vex_printf(",");
   1124          ppHRegX86(i->Xin.FpCMov.dst);
   1125          return;
   1126       case Xin_FpLdCW:
   1127          vex_printf("fldcw ");
   1128          ppX86AMode(i->Xin.FpLdCW.addr);
   1129          return;
   1130       case Xin_FpStSW_AX:
   1131          vex_printf("fstsw %%ax");
   1132          return;
   1133       case Xin_FpCmp:
   1134          vex_printf("gcmp ");
   1135          ppHRegX86(i->Xin.FpCmp.srcL);
   1136          vex_printf(",");
   1137          ppHRegX86(i->Xin.FpCmp.srcR);
   1138          vex_printf(",");
   1139          ppHRegX86(i->Xin.FpCmp.dst);
   1140          break;
   1141       case Xin_SseConst:
   1142          vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
   1143          ppHRegX86(i->Xin.SseConst.dst);
   1144          break;
   1145       case Xin_SseLdSt:
   1146          vex_printf("movups ");
   1147          if (i->Xin.SseLdSt.isLoad) {
   1148             ppX86AMode(i->Xin.SseLdSt.addr);
   1149             vex_printf(",");
   1150             ppHRegX86(i->Xin.SseLdSt.reg);
   1151          } else {
   1152             ppHRegX86(i->Xin.SseLdSt.reg);
   1153             vex_printf(",");
   1154             ppX86AMode(i->Xin.SseLdSt.addr);
   1155          }
   1156          return;
   1157       case Xin_SseLdzLO:
   1158          vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
   1159          ppX86AMode(i->Xin.SseLdzLO.addr);
   1160          vex_printf(",");
   1161          ppHRegX86(i->Xin.SseLdzLO.reg);
   1162          return;
   1163       case Xin_Sse32Fx4:
   1164          vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
   1165          ppHRegX86(i->Xin.Sse32Fx4.src);
   1166          vex_printf(",");
   1167          ppHRegX86(i->Xin.Sse32Fx4.dst);
   1168          return;
   1169       case Xin_Sse32FLo:
   1170          vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
   1171          ppHRegX86(i->Xin.Sse32FLo.src);
   1172          vex_printf(",");
   1173          ppHRegX86(i->Xin.Sse32FLo.dst);
   1174          return;
   1175       case Xin_Sse64Fx2:
   1176          vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
   1177          ppHRegX86(i->Xin.Sse64Fx2.src);
   1178          vex_printf(",");
   1179          ppHRegX86(i->Xin.Sse64Fx2.dst);
   1180          return;
   1181       case Xin_Sse64FLo:
   1182          vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
   1183          ppHRegX86(i->Xin.Sse64FLo.src);
   1184          vex_printf(",");
   1185          ppHRegX86(i->Xin.Sse64FLo.dst);
   1186          return;
   1187       case Xin_SseReRg:
   1188          vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
   1189          ppHRegX86(i->Xin.SseReRg.src);
   1190          vex_printf(",");
   1191          ppHRegX86(i->Xin.SseReRg.dst);
   1192          return;
   1193       case Xin_SseCMov:
   1194          vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
   1195          ppHRegX86(i->Xin.SseCMov.src);
   1196          vex_printf(",");
   1197          ppHRegX86(i->Xin.SseCMov.dst);
   1198          return;
   1199       case Xin_SseShuf:
   1200          vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order);
   1201          ppHRegX86(i->Xin.SseShuf.src);
   1202          vex_printf(",");
   1203          ppHRegX86(i->Xin.SseShuf.dst);
   1204          return;
   1205       case Xin_EvCheck:
   1206          vex_printf("(evCheck) decl ");
   1207          ppX86AMode(i->Xin.EvCheck.amCounter);
   1208          vex_printf("; jns nofail; jmp *");
   1209          ppX86AMode(i->Xin.EvCheck.amFailAddr);
   1210          vex_printf("; nofail:");
   1211          return;
   1212       case Xin_ProfInc:
   1213          vex_printf("(profInc) addl $1,NotKnownYet; "
   1214                     "adcl $0,NotKnownYet+4");
   1215          return;
   1216       default:
   1217          vpanic("ppX86Instr");
   1218    }
   1219 }
   1220 
   1221 /* --------- Helpers for register allocation. --------- */
   1222 
   1223 void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64)
   1224 {
   1225    Bool unary;
   1226    vassert(mode64 == False);
   1227    initHRegUsage(u);
   1228    switch (i->tag) {
   1229       case Xin_Alu32R:
   1230          addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
   1231          if (i->Xin.Alu32R.op == Xalu_MOV) {
   1232             addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
   1233             return;
   1234          }
   1235          if (i->Xin.Alu32R.op == Xalu_CMP) {
   1236             addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
   1237             return;
   1238          }
   1239          addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
   1240          return;
   1241       case Xin_Alu32M:
   1242          addRegUsage_X86RI(u, i->Xin.Alu32M.src);
   1243          addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
   1244          return;
   1245       case Xin_Sh32:
   1246          addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
   1247          if (i->Xin.Sh32.src == 0)
   1248             addHRegUse(u, HRmRead, hregX86_ECX());
   1249          return;
   1250       case Xin_Test32:
   1251          addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
   1252          return;
   1253       case Xin_Unary32:
   1254          addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
   1255          return;
   1256       case Xin_Lea32:
   1257          addRegUsage_X86AMode(u, i->Xin.Lea32.am);
   1258          addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
   1259          return;
   1260       case Xin_MulL:
   1261          addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
   1262          addHRegUse(u, HRmModify, hregX86_EAX());
   1263          addHRegUse(u, HRmWrite, hregX86_EDX());
   1264          return;
   1265       case Xin_Div:
   1266          addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
   1267          addHRegUse(u, HRmModify, hregX86_EAX());
   1268          addHRegUse(u, HRmModify, hregX86_EDX());
   1269          return;
   1270       case Xin_Sh3232:
   1271          addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
   1272          addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
   1273          if (i->Xin.Sh3232.amt == 0)
   1274             addHRegUse(u, HRmRead, hregX86_ECX());
   1275          return;
   1276       case Xin_Push:
   1277          addRegUsage_X86RMI(u, i->Xin.Push.src);
   1278          addHRegUse(u, HRmModify, hregX86_ESP());
   1279          return;
   1280       case Xin_Call:
   1281          /* This is a bit subtle. */
   1282          /* First off, claim it trashes all the caller-saved regs
   1283             which fall within the register allocator's jurisdiction.
   1284             These I believe to be %eax %ecx %edx and all the xmm
   1285             registers. */
   1286          addHRegUse(u, HRmWrite, hregX86_EAX());
   1287          addHRegUse(u, HRmWrite, hregX86_ECX());
   1288          addHRegUse(u, HRmWrite, hregX86_EDX());
   1289          addHRegUse(u, HRmWrite, hregX86_XMM0());
   1290          addHRegUse(u, HRmWrite, hregX86_XMM1());
   1291          addHRegUse(u, HRmWrite, hregX86_XMM2());
   1292          addHRegUse(u, HRmWrite, hregX86_XMM3());
   1293          addHRegUse(u, HRmWrite, hregX86_XMM4());
   1294          addHRegUse(u, HRmWrite, hregX86_XMM5());
   1295          addHRegUse(u, HRmWrite, hregX86_XMM6());
   1296          addHRegUse(u, HRmWrite, hregX86_XMM7());
   1297          /* Now we have to state any parameter-carrying registers
   1298             which might be read.  This depends on the regparmness. */
   1299          switch (i->Xin.Call.regparms) {
   1300             case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
   1301             case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
   1302             case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
   1303             case 0: break;
   1304             default: vpanic("getRegUsage_X86Instr:Call:regparms");
   1305          }
   1306          /* Finally, there is the issue that the insn trashes a
   1307             register because the literal target address has to be
   1308             loaded into a register.  Fortunately, for the 0/1/2
   1309             regparm case, we can use EAX, EDX and ECX respectively, so
   1310             this does not cause any further damage.  For the 3-regparm
   1311             case, we'll have to choose another register arbitrarily --
   1312             since A, D and C are used for parameters -- and so we might
   1313             as well choose EDI. */
   1314          if (i->Xin.Call.regparms == 3)
   1315             addHRegUse(u, HRmWrite, hregX86_EDI());
   1316          /* Upshot of this is that the assembler really must observe
   1317             the here-stated convention of which register to use as an
   1318             address temporary, depending on the regparmness: 0==EAX,
   1319             1==EDX, 2==ECX, 3==EDI. */
   1320          return;
   1321       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
   1322          conditionally exit the block.  Hence we only need to list (1)
   1323          the registers that they read, and (2) the registers that they
   1324          write in the case where the block is not exited.  (2) is
   1325          empty, hence only (1) is relevant here. */
   1326       case Xin_XDirect:
   1327          addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
   1328          return;
   1329       case Xin_XIndir:
   1330          addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
   1331          addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
   1332          return;
   1333       case Xin_XAssisted:
   1334          addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
   1335          addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
   1336          return;
   1337       case Xin_CMov32:
   1338          addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
   1339          addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
   1340          return;
   1341       case Xin_LoadEX:
   1342          addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
   1343          addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
   1344          return;
   1345       case Xin_Store:
   1346          addHRegUse(u, HRmRead, i->Xin.Store.src);
   1347          addRegUsage_X86AMode(u, i->Xin.Store.dst);
   1348          return;
   1349       case Xin_Set32:
   1350          addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
   1351          return;
   1352       case Xin_Bsfr32:
   1353          addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
   1354          addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
   1355          return;
   1356       case Xin_MFence:
   1357          return;
   1358       case Xin_ACAS:
   1359          addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
   1360          addHRegUse(u, HRmRead, hregX86_EBX());
   1361          addHRegUse(u, HRmModify, hregX86_EAX());
   1362          return;
   1363       case Xin_DACAS:
   1364          addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
   1365          addHRegUse(u, HRmRead, hregX86_ECX());
   1366          addHRegUse(u, HRmRead, hregX86_EBX());
   1367          addHRegUse(u, HRmModify, hregX86_EDX());
   1368          addHRegUse(u, HRmModify, hregX86_EAX());
   1369          return;
   1370       case Xin_FpUnary:
   1371          addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
   1372          addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
   1373          return;
   1374       case Xin_FpBinary:
   1375          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
   1376          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
   1377          addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
   1378          return;
   1379       case Xin_FpLdSt:
   1380          addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
   1381          addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
   1382                        i->Xin.FpLdSt.reg);
   1383          return;
   1384       case Xin_FpLdStI:
   1385          addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
   1386          addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
   1387                        i->Xin.FpLdStI.reg);
   1388          return;
   1389       case Xin_Fp64to32:
   1390          addHRegUse(u, HRmRead,  i->Xin.Fp64to32.src);
   1391          addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
   1392          return;
   1393       case Xin_FpCMov:
   1394          addHRegUse(u, HRmRead,   i->Xin.FpCMov.src);
   1395          addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
   1396          return;
   1397       case Xin_FpLdCW:
   1398          addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
   1399          return;
   1400       case Xin_FpStSW_AX:
   1401          addHRegUse(u, HRmWrite, hregX86_EAX());
   1402          return;
   1403       case Xin_FpCmp:
   1404          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
   1405          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
   1406          addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
   1407          addHRegUse(u, HRmWrite, hregX86_EAX());
   1408          return;
   1409       case Xin_SseLdSt:
   1410          addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
   1411          addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
   1412                        i->Xin.SseLdSt.reg);
   1413          return;
   1414       case Xin_SseLdzLO:
   1415          addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
   1416          addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
   1417          return;
   1418       case Xin_SseConst:
   1419          addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
   1420          return;
   1421       case Xin_Sse32Fx4:
   1422          vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
   1423          unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
   1424                          || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
   1425                          || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
   1426          addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
   1427          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1428                        i->Xin.Sse32Fx4.dst);
   1429          return;
   1430       case Xin_Sse32FLo:
   1431          vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
   1432          unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
   1433                          || i->Xin.Sse32FLo.op == Xsse_RSQRTF
   1434                          || i->Xin.Sse32FLo.op == Xsse_SQRTF );
   1435          addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
   1436          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1437                        i->Xin.Sse32FLo.dst);
   1438          return;
   1439       case Xin_Sse64Fx2:
   1440          vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
   1441          unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
   1442                          || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
   1443                          || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
   1444          addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
   1445          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1446                        i->Xin.Sse64Fx2.dst);
   1447          return;
   1448       case Xin_Sse64FLo:
   1449          vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
   1450          unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
   1451                          || i->Xin.Sse64FLo.op == Xsse_RSQRTF
   1452                          || i->Xin.Sse64FLo.op == Xsse_SQRTF );
   1453          addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
   1454          addHRegUse(u, unary ? HRmWrite : HRmModify,
   1455                        i->Xin.Sse64FLo.dst);
   1456          return;
   1457       case Xin_SseReRg:
   1458          if (i->Xin.SseReRg.op == Xsse_XOR
   1459              && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) {
   1460             /* reg-alloc needs to understand 'xor r,r' as a write of r */
   1461             /* (as opposed to a rite of passage :-) */
   1462             addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
   1463          } else {
   1464             addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
   1465             addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
   1466                              ? HRmWrite : HRmModify,
   1467                           i->Xin.SseReRg.dst);
   1468          }
   1469          return;
   1470       case Xin_SseCMov:
   1471          addHRegUse(u, HRmRead,   i->Xin.SseCMov.src);
   1472          addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
   1473          return;
   1474       case Xin_SseShuf:
   1475          addHRegUse(u, HRmRead,  i->Xin.SseShuf.src);
   1476          addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
   1477          return;
   1478       case Xin_EvCheck:
   1479          /* We expect both amodes only to mention %ebp, so this is in
   1480             fact pointless, since %ebp isn't allocatable, but anyway.. */
   1481          addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
   1482          addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
   1483          return;
   1484       case Xin_ProfInc:
   1485          /* does not use any registers. */
   1486          return;
   1487       default:
   1488          ppX86Instr(i, False);
   1489          vpanic("getRegUsage_X86Instr");
   1490    }
   1491 }
   1492 
   1493 /* local helper */
   1494 static void mapReg( HRegRemap* m, HReg* r )
   1495 {
   1496    *r = lookupHRegRemap(m, *r);
   1497 }
   1498 
   1499 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
   1500 {
   1501    vassert(mode64 == False);
   1502    switch (i->tag) {
   1503       case Xin_Alu32R:
   1504          mapRegs_X86RMI(m, i->Xin.Alu32R.src);
   1505          mapReg(m, &i->Xin.Alu32R.dst);
   1506          return;
   1507       case Xin_Alu32M:
   1508          mapRegs_X86RI(m, i->Xin.Alu32M.src);
   1509          mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
   1510          return;
   1511       case Xin_Sh32:
   1512          mapReg(m, &i->Xin.Sh32.dst);
   1513          return;
   1514       case Xin_Test32:
   1515          mapRegs_X86RM(m, i->Xin.Test32.dst);
   1516          return;
   1517       case Xin_Unary32:
   1518          mapReg(m, &i->Xin.Unary32.dst);
   1519          return;
   1520       case Xin_Lea32:
   1521          mapRegs_X86AMode(m, i->Xin.Lea32.am);
   1522          mapReg(m, &i->Xin.Lea32.dst);
   1523          return;
   1524       case Xin_MulL:
   1525          mapRegs_X86RM(m, i->Xin.MulL.src);
   1526          return;
   1527       case Xin_Div:
   1528          mapRegs_X86RM(m, i->Xin.Div.src);
   1529          return;
   1530       case Xin_Sh3232:
   1531          mapReg(m, &i->Xin.Sh3232.src);
   1532          mapReg(m, &i->Xin.Sh3232.dst);
   1533          return;
   1534       case Xin_Push:
   1535          mapRegs_X86RMI(m, i->Xin.Push.src);
   1536          return;
   1537       case Xin_Call:
   1538          return;
   1539       case Xin_XDirect:
   1540          mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
   1541          return;
   1542       case Xin_XIndir:
   1543          mapReg(m, &i->Xin.XIndir.dstGA);
   1544          mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
   1545          return;
   1546       case Xin_XAssisted:
   1547          mapReg(m, &i->Xin.XAssisted.dstGA);
   1548          mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
   1549          return;
   1550       case Xin_CMov32:
   1551          mapRegs_X86RM(m, i->Xin.CMov32.src);
   1552          mapReg(m, &i->Xin.CMov32.dst);
   1553          return;
   1554       case Xin_LoadEX:
   1555          mapRegs_X86AMode(m, i->Xin.LoadEX.src);
   1556          mapReg(m, &i->Xin.LoadEX.dst);
   1557          return;
   1558       case Xin_Store:
   1559          mapReg(m, &i->Xin.Store.src);
   1560          mapRegs_X86AMode(m, i->Xin.Store.dst);
   1561          return;
   1562       case Xin_Set32:
   1563          mapReg(m, &i->Xin.Set32.dst);
   1564          return;
   1565       case Xin_Bsfr32:
   1566          mapReg(m, &i->Xin.Bsfr32.src);
   1567          mapReg(m, &i->Xin.Bsfr32.dst);
   1568          return;
   1569       case Xin_MFence:
   1570          return;
   1571       case Xin_ACAS:
   1572          mapRegs_X86AMode(m, i->Xin.ACAS.addr);
   1573          return;
   1574       case Xin_DACAS:
   1575          mapRegs_X86AMode(m, i->Xin.DACAS.addr);
   1576          return;
   1577       case Xin_FpUnary:
   1578          mapReg(m, &i->Xin.FpUnary.src);
   1579          mapReg(m, &i->Xin.FpUnary.dst);
   1580          return;
   1581       case Xin_FpBinary:
   1582          mapReg(m, &i->Xin.FpBinary.srcL);
   1583          mapReg(m, &i->Xin.FpBinary.srcR);
   1584          mapReg(m, &i->Xin.FpBinary.dst);
   1585          return;
   1586       case Xin_FpLdSt:
   1587          mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
   1588          mapReg(m, &i->Xin.FpLdSt.reg);
   1589          return;
   1590       case Xin_FpLdStI:
   1591          mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
   1592          mapReg(m, &i->Xin.FpLdStI.reg);
   1593          return;
   1594       case Xin_Fp64to32:
   1595          mapReg(m, &i->Xin.Fp64to32.src);
   1596          mapReg(m, &i->Xin.Fp64to32.dst);
   1597          return;
   1598       case Xin_FpCMov:
   1599          mapReg(m, &i->Xin.FpCMov.src);
   1600          mapReg(m, &i->Xin.FpCMov.dst);
   1601          return;
   1602       case Xin_FpLdCW:
   1603          mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
   1604          return;
   1605       case Xin_FpStSW_AX:
   1606          return;
   1607       case Xin_FpCmp:
   1608          mapReg(m, &i->Xin.FpCmp.srcL);
   1609          mapReg(m, &i->Xin.FpCmp.srcR);
   1610          mapReg(m, &i->Xin.FpCmp.dst);
   1611          return;
   1612       case Xin_SseConst:
   1613          mapReg(m, &i->Xin.SseConst.dst);
   1614          return;
   1615       case Xin_SseLdSt:
   1616          mapReg(m, &i->Xin.SseLdSt.reg);
   1617          mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
   1618          break;
   1619       case Xin_SseLdzLO:
   1620          mapReg(m, &i->Xin.SseLdzLO.reg);
   1621          mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
   1622          break;
   1623       case Xin_Sse32Fx4:
   1624          mapReg(m, &i->Xin.Sse32Fx4.src);
   1625          mapReg(m, &i->Xin.Sse32Fx4.dst);
   1626          return;
   1627       case Xin_Sse32FLo:
   1628          mapReg(m, &i->Xin.Sse32FLo.src);
   1629          mapReg(m, &i->Xin.Sse32FLo.dst);
   1630          return;
   1631       case Xin_Sse64Fx2:
   1632          mapReg(m, &i->Xin.Sse64Fx2.src);
   1633          mapReg(m, &i->Xin.Sse64Fx2.dst);
   1634          return;
   1635       case Xin_Sse64FLo:
   1636          mapReg(m, &i->Xin.Sse64FLo.src);
   1637          mapReg(m, &i->Xin.Sse64FLo.dst);
   1638          return;
   1639       case Xin_SseReRg:
   1640          mapReg(m, &i->Xin.SseReRg.src);
   1641          mapReg(m, &i->Xin.SseReRg.dst);
   1642          return;
   1643       case Xin_SseCMov:
   1644          mapReg(m, &i->Xin.SseCMov.src);
   1645          mapReg(m, &i->Xin.SseCMov.dst);
   1646          return;
   1647       case Xin_SseShuf:
   1648          mapReg(m, &i->Xin.SseShuf.src);
   1649          mapReg(m, &i->Xin.SseShuf.dst);
   1650          return;
   1651       case Xin_EvCheck:
   1652          /* We expect both amodes only to mention %ebp, so this is in
   1653             fact pointless, since %ebp isn't allocatable, but anyway.. */
   1654          mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
   1655          mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
   1656          return;
   1657       case Xin_ProfInc:
   1658          /* does not use any registers. */
   1659          return;
   1660 
   1661       default:
   1662          ppX86Instr(i, mode64);
   1663          vpanic("mapRegs_X86Instr");
   1664    }
   1665 }
   1666 
   1667 /* Figure out if i represents a reg-reg move, and if so assign the
   1668    source and destination to *src and *dst.  If in doubt say No.  Used
   1669    by the register allocator to do move coalescing.
   1670 */
   1671 Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst )
   1672 {
   1673    /* Moves between integer regs */
   1674    if (i->tag == Xin_Alu32R) {
   1675       if (i->Xin.Alu32R.op != Xalu_MOV)
   1676          return False;
   1677       if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
   1678          return False;
   1679       *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
   1680       *dst = i->Xin.Alu32R.dst;
   1681       return True;
   1682    }
   1683    /* Moves between FP regs */
   1684    if (i->tag == Xin_FpUnary) {
   1685       if (i->Xin.FpUnary.op != Xfp_MOV)
   1686          return False;
   1687       *src = i->Xin.FpUnary.src;
   1688       *dst = i->Xin.FpUnary.dst;
   1689       return True;
   1690    }
   1691    if (i->tag == Xin_SseReRg) {
   1692       if (i->Xin.SseReRg.op != Xsse_MOV)
   1693          return False;
   1694       *src = i->Xin.SseReRg.src;
   1695       *dst = i->Xin.SseReRg.dst;
   1696       return True;
   1697    }
   1698    return False;
   1699 }
   1700 
   1701 
   1702 /* Generate x86 spill/reload instructions under the direction of the
   1703    register allocator.  Note it's critical these don't write the
   1704    condition codes. */
   1705 
   1706 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1707                     HReg rreg, Int offsetB, Bool mode64 )
   1708 {
   1709    X86AMode* am;
   1710    vassert(offsetB >= 0);
   1711    vassert(!hregIsVirtual(rreg));
   1712    vassert(mode64 == False);
   1713    *i1 = *i2 = NULL;
   1714    am = X86AMode_IR(offsetB, hregX86_EBP());
   1715    switch (hregClass(rreg)) {
   1716       case HRcInt32:
   1717          *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
   1718          return;
   1719       case HRcFlt64:
   1720          *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
   1721          return;
   1722       case HRcVec128:
   1723          *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
   1724          return;
   1725       default:
   1726          ppHRegClass(hregClass(rreg));
   1727          vpanic("genSpill_X86: unimplemented regclass");
   1728    }
   1729 }
   1730 
   1731 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   1732                      HReg rreg, Int offsetB, Bool mode64 )
   1733 {
   1734    X86AMode* am;
   1735    vassert(offsetB >= 0);
   1736    vassert(!hregIsVirtual(rreg));
   1737    vassert(mode64 == False);
   1738    *i1 = *i2 = NULL;
   1739    am = X86AMode_IR(offsetB, hregX86_EBP());
   1740    switch (hregClass(rreg)) {
   1741       case HRcInt32:
   1742          *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
   1743          return;
   1744       case HRcFlt64:
   1745          *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
   1746          return;
   1747       case HRcVec128:
   1748          *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
   1749          return;
   1750       default:
   1751          ppHRegClass(hregClass(rreg));
   1752          vpanic("genReload_X86: unimplemented regclass");
   1753    }
   1754 }
   1755 
   1756 /* The given instruction reads the specified vreg exactly once, and
   1757    that vreg is currently located at the given spill offset.  If
   1758    possible, return a variant of the instruction to one which instead
   1759    references the spill slot directly. */
   1760 
   1761 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
   1762 {
   1763    vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
   1764 
   1765    /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
   1766       Convert to: src=RMI_Mem, dst=Reg
   1767    */
   1768    if (i->tag == Xin_Alu32R
   1769        && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
   1770            || i->Xin.Alu32R.op == Xalu_XOR)
   1771        && i->Xin.Alu32R.src->tag == Xrmi_Reg
   1772        && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) {
   1773       vassert(! sameHReg(i->Xin.Alu32R.dst, vreg));
   1774       return X86Instr_Alu32R(
   1775                 i->Xin.Alu32R.op,
   1776                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
   1777                 i->Xin.Alu32R.dst
   1778              );
   1779    }
   1780 
   1781    /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
   1782       Convert to: src=RI_Imm, dst=Mem
   1783    */
   1784    if (i->tag == Xin_Alu32R
   1785        && (i->Xin.Alu32R.op == Xalu_CMP)
   1786        && i->Xin.Alu32R.src->tag == Xrmi_Imm
   1787        && sameHReg(i->Xin.Alu32R.dst, vreg)) {
   1788       return X86Instr_Alu32M(
   1789                 i->Xin.Alu32R.op,
   1790 		X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
   1791                 X86AMode_IR( spill_off, hregX86_EBP())
   1792              );
   1793    }
   1794 
   1795    /* Deal with form: Push(RMI_Reg)
   1796       Convert to: Push(RMI_Mem)
   1797    */
   1798    if (i->tag == Xin_Push
   1799        && i->Xin.Push.src->tag == Xrmi_Reg
   1800        && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) {
   1801       return X86Instr_Push(
   1802                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
   1803              );
   1804    }
   1805 
   1806    /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
   1807       Convert to CMov32(RM_Mem, dst) */
   1808    if (i->tag == Xin_CMov32
   1809        && i->Xin.CMov32.src->tag == Xrm_Reg
   1810        && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) {
   1811       vassert(! sameHReg(i->Xin.CMov32.dst, vreg));
   1812       return X86Instr_CMov32(
   1813                 i->Xin.CMov32.cond,
   1814                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
   1815                 i->Xin.CMov32.dst
   1816              );
   1817    }
   1818 
   1819    /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
   1820    if (i->tag == Xin_Test32
   1821        && i->Xin.Test32.dst->tag == Xrm_Reg
   1822        && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) {
   1823       return X86Instr_Test32(
   1824                 i->Xin.Test32.imm32,
   1825                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
   1826              );
   1827    }
   1828 
   1829    return NULL;
   1830 }
   1831 
   1832 
   1833 /* --------- The x86 assembler (bleh.) --------- */
   1834 
   1835 static UChar iregNo ( HReg r )
   1836 {
   1837    UInt n;
   1838    vassert(hregClass(r) == HRcInt32);
   1839    vassert(!hregIsVirtual(r));
   1840    n = hregNumber(r);
   1841    vassert(n <= 7);
   1842    return toUChar(n);
   1843 }
   1844 
   1845 static UInt fregNo ( HReg r )
   1846 {
   1847    UInt n;
   1848    vassert(hregClass(r) == HRcFlt64);
   1849    vassert(!hregIsVirtual(r));
   1850    n = hregNumber(r);
   1851    vassert(n <= 5);
   1852    return n;
   1853 }
   1854 
   1855 static UInt vregNo ( HReg r )
   1856 {
   1857    UInt n;
   1858    vassert(hregClass(r) == HRcVec128);
   1859    vassert(!hregIsVirtual(r));
   1860    n = hregNumber(r);
   1861    vassert(n <= 7);
   1862    return n;
   1863 }
   1864 
   1865 static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
   1866 {
   1867    vassert(mod < 4);
   1868    vassert((reg|regmem) < 8);
   1869    return toUChar( ((mod & 3) << 6)
   1870                    | ((reg & 7) << 3)
   1871                    | (regmem & 7) );
   1872 }
   1873 
   1874 static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
   1875 {
   1876    vassert(shift < 4);
   1877    vassert((regindex|regbase) < 8);
   1878    return toUChar( ((shift & 3) << 6)
   1879                    | ((regindex & 7) << 3)
   1880                    | (regbase & 7) );
   1881 }
   1882 
   1883 static UChar* emit32 ( UChar* p, UInt w32 )
   1884 {
   1885    *p++ = toUChar( w32        & 0x000000FF);
   1886    *p++ = toUChar((w32 >>  8) & 0x000000FF);
   1887    *p++ = toUChar((w32 >> 16) & 0x000000FF);
   1888    *p++ = toUChar((w32 >> 24) & 0x000000FF);
   1889    return p;
   1890 }
   1891 
   1892 /* Does a sign-extend of the lowest 8 bits give
   1893    the original number? */
   1894 static Bool fits8bits ( UInt w32 )
   1895 {
   1896    Int i32 = (Int)w32;
   1897    return toBool(i32 == ((i32 << 24) >> 24));
   1898 }
   1899 
   1900 
   1901 /* Forming mod-reg-rm bytes and scale-index-base bytes.
   1902 
   1903      greg,  0(ereg)    |  ereg != ESP && ereg != EBP
   1904                        =  00 greg ereg
   1905 
   1906      greg,  d8(ereg)   |  ereg != ESP
   1907                        =  01 greg ereg, d8
   1908 
   1909      greg,  d32(ereg)  |  ereg != ESP
   1910                        =  10 greg ereg, d32
   1911 
   1912      greg,  d8(%esp)   =  01 greg 100, 0x24, d8
   1913 
   1914      -----------------------------------------------
   1915 
   1916      greg,  d8(base,index,scale)
   1917                |  index != ESP
   1918                =  01 greg 100, scale index base, d8
   1919 
   1920      greg,  d32(base,index,scale)
   1921                |  index != ESP
   1922                =  10 greg 100, scale index base, d32
   1923 */
   1924 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
   1925 {
   1926    if (am->tag == Xam_IR) {
   1927       if (am->Xam.IR.imm == 0
   1928           && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())
   1929           && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) {
   1930          *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg));
   1931          return p;
   1932       }
   1933       if (fits8bits(am->Xam.IR.imm)
   1934           && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
   1935          *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg));
   1936          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
   1937          return p;
   1938       }
   1939       if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
   1940          *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg));
   1941          p = emit32(p, am->Xam.IR.imm);
   1942          return p;
   1943       }
   1944       if (sameHReg(am->Xam.IR.reg, hregX86_ESP())
   1945           && fits8bits(am->Xam.IR.imm)) {
   1946  	 *p++ = mkModRegRM(1, iregNo(greg), 4);
   1947          *p++ = 0x24;
   1948          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
   1949          return p;
   1950       }
   1951       ppX86AMode(am);
   1952       vpanic("doAMode_M: can't emit amode IR");
   1953       /*NOTREACHED*/
   1954    }
   1955    if (am->tag == Xam_IRRS) {
   1956       if (fits8bits(am->Xam.IRRS.imm)
   1957           && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
   1958          *p++ = mkModRegRM(1, iregNo(greg), 4);
   1959          *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index),
   1960                                           iregNo(am->Xam.IRRS.base));
   1961          *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
   1962          return p;
   1963       }
   1964       if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
   1965          *p++ = mkModRegRM(2, iregNo(greg), 4);
   1966          *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index),
   1967                                           iregNo(am->Xam.IRRS.base));
   1968          p = emit32(p, am->Xam.IRRS.imm);
   1969          return p;
   1970       }
   1971       ppX86AMode(am);
   1972       vpanic("doAMode_M: can't emit amode IRRS");
   1973       /*NOTREACHED*/
   1974    }
   1975    vpanic("doAMode_M: unknown amode");
   1976    /*NOTREACHED*/
   1977 }
   1978 
   1979 
   1980 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
   1981 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
   1982 {
   1983    *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg));
   1984    return p;
   1985 }
   1986 
   1987 
   1988 /* Emit ffree %st(7) */
   1989 static UChar* do_ffree_st7 ( UChar* p )
   1990 {
   1991    *p++ = 0xDD;
   1992    *p++ = 0xC7;
   1993    return p;
   1994 }
   1995 
   1996 /* Emit fstp %st(i), 1 <= i <= 7 */
   1997 static UChar* do_fstp_st ( UChar* p, Int i )
   1998 {
   1999    vassert(1 <= i && i <= 7);
   2000    *p++ = 0xDD;
   2001    *p++ = toUChar(0xD8+i);
   2002    return p;
   2003 }
   2004 
   2005 /* Emit fld %st(i), 0 <= i <= 6 */
   2006 static UChar* do_fld_st ( UChar* p, Int i )
   2007 {
   2008    vassert(0 <= i && i <= 6);
   2009    *p++ = 0xD9;
   2010    *p++ = toUChar(0xC0+i);
   2011    return p;
   2012 }
   2013 
   2014 /* Emit f<op> %st(0) */
   2015 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
   2016 {
   2017    switch (op) {
   2018       case Xfp_NEG:    *p++ = 0xD9; *p++ = 0xE0; break;
   2019       case Xfp_ABS:    *p++ = 0xD9; *p++ = 0xE1; break;
   2020       case Xfp_SQRT:   *p++ = 0xD9; *p++ = 0xFA; break;
   2021       case Xfp_ROUND:  *p++ = 0xD9; *p++ = 0xFC; break;
   2022       case Xfp_SIN:    *p++ = 0xD9; *p++ = 0xFE; break;
   2023       case Xfp_COS:    *p++ = 0xD9; *p++ = 0xFF; break;
   2024       case Xfp_2XM1:   *p++ = 0xD9; *p++ = 0xF0; break;
   2025       case Xfp_MOV:    break;
   2026       case Xfp_TAN:
   2027          /* fptan pushes 1.0 on the FP stack, except when the argument
   2028             is out of range.  Hence we have to do the instruction,
   2029             then inspect C2 to see if there is an out of range
   2030             condition.  If there is, we skip the fincstp that is used
   2031             by the in-range case to get rid of this extra 1.0
   2032             value. */
   2033          p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */
   2034          *p++ = 0xD9; *p++ = 0xF2; // fptan
   2035          *p++ = 0x50;              // pushl %eax
   2036          *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
   2037          *p++ = 0x66; *p++ = 0xA9;
   2038          *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
   2039          *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
   2040          *p++ = 0xD9; *p++ = 0xF7; // fincstp
   2041          *p++ = 0x58;              // after_fincstp: popl %eax
   2042          break;
   2043       default:
   2044          vpanic("do_fop1_st: unknown op");
   2045    }
   2046    return p;
   2047 }
   2048 
   2049 /* Emit f<op> %st(i), 1 <= i <= 5 */
   2050 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
   2051 {
   2052 #  define fake(_n) mkHReg((_n), HRcInt32, False)
   2053    Int subopc;
   2054    switch (op) {
   2055       case Xfp_ADD: subopc = 0; break;
   2056       case Xfp_SUB: subopc = 4; break;
   2057       case Xfp_MUL: subopc = 1; break;
   2058       case Xfp_DIV: subopc = 6; break;
   2059       default: vpanic("do_fop2_st: unknown op");
   2060    }
   2061    *p++ = 0xD8;
   2062    p    = doAMode_R(p, fake(subopc), fake(i));
   2063    return p;
   2064 #  undef fake
   2065 }
   2066 
   2067 /* Push a 32-bit word on the stack.  The word depends on tags[3:0];
   2068 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
   2069 */
   2070 static UChar* push_word_from_tags ( UChar* p, UShort tags )
   2071 {
   2072    UInt w;
   2073    vassert(0 == (tags & ~0xF));
   2074    if (tags == 0) {
   2075       /* pushl $0x00000000 */
   2076       *p++ = 0x6A;
   2077       *p++ = 0x00;
   2078    }
   2079    else
   2080    /* pushl $0xFFFFFFFF */
   2081    if (tags == 0xF) {
   2082       *p++ = 0x6A;
   2083       *p++ = 0xFF;
   2084    } else {
   2085       vassert(0); /* awaiting test case */
   2086       w = 0;
   2087       if (tags & 1) w |= 0x000000FF;
   2088       if (tags & 2) w |= 0x0000FF00;
   2089       if (tags & 4) w |= 0x00FF0000;
   2090       if (tags & 8) w |= 0xFF000000;
   2091       *p++ = 0x68;
   2092       p = emit32(p, w);
   2093    }
   2094    return p;
   2095 }
   2096 
   2097 /* Emit an instruction into buf and return the number of bytes used.
   2098    Note that buf is not the insn's final place, and therefore it is
   2099    imperative to emit position-independent code.  If the emitted
   2100    instruction was a profiler inc, set *is_profInc to True, else
   2101    leave it unchanged. */
   2102 
   2103 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
   2104                     UChar* buf, Int nbuf, X86Instr* i,
   2105                     Bool mode64,
   2106                     void* disp_cp_chain_me_to_slowEP,
   2107                     void* disp_cp_chain_me_to_fastEP,
   2108                     void* disp_cp_xindir,
   2109                     void* disp_cp_xassisted )
   2110 {
   2111    UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
   2112 
   2113    UInt   xtra;
   2114    UChar* p = &buf[0];
   2115    UChar* ptmp;
   2116    vassert(nbuf >= 32);
   2117    vassert(mode64 == False);
   2118 
   2119    /* Wrap an integer as a int register, for use assembling
   2120       GrpN insns, in which the greg field is used as a sub-opcode
   2121       and does not really contain a register. */
   2122 #  define fake(_n) mkHReg((_n), HRcInt32, False)
   2123 
   2124    /* vex_printf("asm  ");ppX86Instr(i, mode64); vex_printf("\n"); */
   2125 
   2126    switch (i->tag) {
   2127 
   2128    case Xin_Alu32R:
   2129       /* Deal specially with MOV */
   2130       if (i->Xin.Alu32R.op == Xalu_MOV) {
   2131          switch (i->Xin.Alu32R.src->tag) {
   2132             case Xrmi_Imm:
   2133                *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst));
   2134                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2135                goto done;
   2136             case Xrmi_Reg:
   2137                *p++ = 0x89;
   2138                p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
   2139                                 i->Xin.Alu32R.dst);
   2140                goto done;
   2141             case Xrmi_Mem:
   2142                *p++ = 0x8B;
   2143                p = doAMode_M(p, i->Xin.Alu32R.dst,
   2144                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
   2145                goto done;
   2146             default:
   2147                goto bad;
   2148          }
   2149       }
   2150       /* MUL */
   2151       if (i->Xin.Alu32R.op == Xalu_MUL) {
   2152          switch (i->Xin.Alu32R.src->tag) {
   2153             case Xrmi_Reg:
   2154                *p++ = 0x0F;
   2155                *p++ = 0xAF;
   2156                p = doAMode_R(p, i->Xin.Alu32R.dst,
   2157                                 i->Xin.Alu32R.src->Xrmi.Reg.reg);
   2158                goto done;
   2159             case Xrmi_Mem:
   2160                *p++ = 0x0F;
   2161                *p++ = 0xAF;
   2162                p = doAMode_M(p, i->Xin.Alu32R.dst,
   2163                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
   2164                goto done;
   2165             case Xrmi_Imm:
   2166                if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2167                   *p++ = 0x6B;
   2168                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
   2169                   *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2170                } else {
   2171                   *p++ = 0x69;
   2172                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
   2173                   p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2174                }
   2175                goto done;
   2176             default:
   2177                goto bad;
   2178          }
   2179       }
   2180       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
   2181       opc = opc_rr = subopc_imm = opc_imma = 0;
   2182       switch (i->Xin.Alu32R.op) {
   2183          case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
   2184                         subopc_imm = 2; opc_imma = 0x15; break;
   2185          case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
   2186                         subopc_imm = 0; opc_imma = 0x05; break;
   2187          case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
   2188                         subopc_imm = 5; opc_imma = 0x2D; break;
   2189          case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
   2190                         subopc_imm = 3; opc_imma = 0x1D; break;
   2191          case Xalu_AND: opc = 0x23; opc_rr = 0x21;
   2192                         subopc_imm = 4; opc_imma = 0x25; break;
   2193          case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
   2194                         subopc_imm = 6; opc_imma = 0x35; break;
   2195          case Xalu_OR:  opc = 0x0B; opc_rr = 0x09;
   2196                         subopc_imm = 1; opc_imma = 0x0D; break;
   2197          case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
   2198                         subopc_imm = 7; opc_imma = 0x3D; break;
   2199          default: goto bad;
   2200       }
   2201       switch (i->Xin.Alu32R.src->tag) {
   2202          case Xrmi_Imm:
   2203             if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX())
   2204                 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2205                *p++ = toUChar(opc_imma);
   2206                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2207             } else
   2208             if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
   2209                *p++ = 0x83;
   2210                p    = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
   2211                *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2212             } else {
   2213                *p++ = 0x81;
   2214                p    = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
   2215                p    = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
   2216             }
   2217             goto done;
   2218          case Xrmi_Reg:
   2219             *p++ = toUChar(opc_rr);
   2220             p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
   2221                              i->Xin.Alu32R.dst);
   2222             goto done;
   2223          case Xrmi_Mem:
   2224             *p++ = toUChar(opc);
   2225             p = doAMode_M(p, i->Xin.Alu32R.dst,
   2226                              i->Xin.Alu32R.src->Xrmi.Mem.am);
   2227             goto done;
   2228          default:
   2229             goto bad;
   2230       }
   2231       break;
   2232 
   2233    case Xin_Alu32M:
   2234       /* Deal specially with MOV */
   2235       if (i->Xin.Alu32M.op == Xalu_MOV) {
   2236          switch (i->Xin.Alu32M.src->tag) {
   2237             case Xri_Reg:
   2238                *p++ = 0x89;
   2239                p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2240                                 i->Xin.Alu32M.dst);
   2241                goto done;
   2242             case Xri_Imm:
   2243                *p++ = 0xC7;
   2244                p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst);
   2245                p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2246                goto done;
   2247             default:
   2248                goto bad;
   2249          }
   2250       }
   2251       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP.  MUL is not
   2252          allowed here. */
   2253       opc = subopc_imm = opc_imma = 0;
   2254       switch (i->Xin.Alu32M.op) {
   2255          case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
   2256          case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
   2257          case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
   2258          default: goto bad;
   2259       }
   2260       switch (i->Xin.Alu32M.src->tag) {
   2261          case Xri_Reg:
   2262             *p++ = toUChar(opc);
   2263             p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
   2264                              i->Xin.Alu32M.dst);
   2265             goto done;
   2266          case Xri_Imm:
   2267             if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
   2268                *p++ = 0x83;
   2269                p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
   2270                *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
   2271                goto done;
   2272             } else {
   2273                *p++ = 0x81;
   2274                p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
   2275                p    = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
   2276                goto done;
   2277             }
   2278          default:
   2279             goto bad;
   2280       }
   2281       break;
   2282 
   2283    case Xin_Sh32:
   2284       opc_cl = opc_imm = subopc = 0;
   2285       switch (i->Xin.Sh32.op) {
   2286          case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
   2287          case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
   2288          case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
   2289          default: goto bad;
   2290       }
   2291       if (i->Xin.Sh32.src == 0) {
   2292          *p++ = toUChar(opc_cl);
   2293          p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
   2294       } else {
   2295          *p++ = toUChar(opc_imm);
   2296          p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
   2297          *p++ = (UChar)(i->Xin.Sh32.src);
   2298       }
   2299       goto done;
   2300 
   2301    case Xin_Test32:
   2302       if (i->Xin.Test32.dst->tag == Xrm_Reg) {
   2303          /* testl $imm32, %reg */
   2304          *p++ = 0xF7;
   2305          p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg);
   2306          p = emit32(p, i->Xin.Test32.imm32);
   2307          goto done;
   2308       } else {
   2309          /* testl $imm32, amode */
   2310          *p++ = 0xF7;
   2311          p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am);
   2312          p = emit32(p, i->Xin.Test32.imm32);
   2313          goto done;
   2314       }
   2315 
   2316    case Xin_Unary32:
   2317       if (i->Xin.Unary32.op == Xun_NOT) {
   2318          *p++ = 0xF7;
   2319          p = doAMode_R(p, fake(2), i->Xin.Unary32.dst);
   2320          goto done;
   2321       }
   2322       if (i->Xin.Unary32.op == Xun_NEG) {
   2323          *p++ = 0xF7;
   2324          p = doAMode_R(p, fake(3), i->Xin.Unary32.dst);
   2325          goto done;
   2326       }
   2327       break;
   2328 
   2329    case Xin_Lea32:
   2330       *p++ = 0x8D;
   2331       p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
   2332       goto done;
   2333 
   2334    case Xin_MulL:
   2335       subopc = i->Xin.MulL.syned ? 5 : 4;
   2336       *p++ = 0xF7;
   2337       switch (i->Xin.MulL.src->tag)  {
   2338          case Xrm_Mem:
   2339             p = doAMode_M(p, fake(subopc),
   2340                              i->Xin.MulL.src->Xrm.Mem.am);
   2341             goto done;
   2342          case Xrm_Reg:
   2343             p = doAMode_R(p, fake(subopc),
   2344                              i->Xin.MulL.src->Xrm.Reg.reg);
   2345             goto done;
   2346          default:
   2347             goto bad;
   2348       }
   2349       break;
   2350 
   2351    case Xin_Div:
   2352       subopc = i->Xin.Div.syned ? 7 : 6;
   2353       *p++ = 0xF7;
   2354       switch (i->Xin.Div.src->tag)  {
   2355          case Xrm_Mem:
   2356             p = doAMode_M(p, fake(subopc),
   2357                              i->Xin.Div.src->Xrm.Mem.am);
   2358             goto done;
   2359          case Xrm_Reg:
   2360             p = doAMode_R(p, fake(subopc),
   2361                              i->Xin.Div.src->Xrm.Reg.reg);
   2362             goto done;
   2363          default:
   2364             goto bad;
   2365       }
   2366       break;
   2367 
   2368    case Xin_Sh3232:
   2369       vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
   2370       if (i->Xin.Sh3232.amt == 0) {
   2371          /* shldl/shrdl by %cl */
   2372          *p++ = 0x0F;
   2373          if (i->Xin.Sh3232.op == Xsh_SHL) {
   2374             *p++ = 0xA5;
   2375          } else {
   2376             *p++ = 0xAD;
   2377          }
   2378          p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
   2379          goto done;
   2380       }
   2381       break;
   2382 
   2383    case Xin_Push:
   2384       switch (i->Xin.Push.src->tag) {
   2385          case Xrmi_Mem:
   2386             *p++ = 0xFF;
   2387             p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am);
   2388             goto done;
   2389          case Xrmi_Imm:
   2390             *p++ = 0x68;
   2391             p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
   2392             goto done;
   2393          case Xrmi_Reg:
   2394             *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg));
   2395             goto done;
   2396         default:
   2397             goto bad;
   2398       }
   2399 
   2400    case Xin_Call:
   2401       if (i->Xin.Call.cond != Xcc_ALWAYS
   2402           && i->Xin.Call.rloc.pri != RLPri_None) {
   2403          /* The call might not happen (it isn't unconditional) and it
   2404             returns a result.  In this case we will need to generate a
   2405             control flow diamond to put 0x555..555 in the return
   2406             register(s) in the case where the call doesn't happen.  If
   2407             this ever becomes necessary, maybe copy code from the ARM
   2408             equivalent.  Until that day, just give up. */
   2409          goto bad;
   2410       }
   2411       /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
   2412          for explanation of this. */
   2413       switch (i->Xin.Call.regparms) {
   2414          case 0: irno = iregNo(hregX86_EAX()); break;
   2415          case 1: irno = iregNo(hregX86_EDX()); break;
   2416          case 2: irno = iregNo(hregX86_ECX()); break;
   2417          case 3: irno = iregNo(hregX86_EDI()); break;
   2418          default: vpanic(" emit_X86Instr:call:regparms");
   2419       }
   2420       /* jump over the following two insns if the condition does not
   2421          hold */
   2422       if (i->Xin.Call.cond != Xcc_ALWAYS) {
   2423          *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
   2424          *p++ = 0x07; /* 7 bytes in the next two insns */
   2425       }
   2426       /* movl $target, %tmp */
   2427       *p++ = toUChar(0xB8 + irno);
   2428       p = emit32(p, i->Xin.Call.target);
   2429       /* call *%tmp */
   2430       *p++ = 0xFF;
   2431       *p++ = toUChar(0xD0 + irno);
   2432       goto done;
   2433 
   2434    case Xin_XDirect: {
   2435       /* NB: what goes on here has to be very closely coordinated with the
   2436          chainXDirect_X86 and unchainXDirect_X86 below. */
   2437       /* We're generating chain-me requests here, so we need to be
   2438          sure this is actually allowed -- no-redir translations can't
   2439          use chain-me's.  Hence: */
   2440       vassert(disp_cp_chain_me_to_slowEP != NULL);
   2441       vassert(disp_cp_chain_me_to_fastEP != NULL);
   2442 
   2443       /* Use ptmp for backpatching conditional jumps. */
   2444       ptmp = NULL;
   2445 
   2446       /* First off, if this is conditional, create a conditional
   2447          jump over the rest of it. */
   2448       if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
   2449          /* jmp fwds if !condition */
   2450          *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
   2451          ptmp = p; /* fill in this bit later */
   2452          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2453       }
   2454 
   2455       /* Update the guest EIP. */
   2456       /* movl $dstGA, amEIP */
   2457       *p++ = 0xC7;
   2458       p    = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP);
   2459       p    = emit32(p, i->Xin.XDirect.dstGA);
   2460 
   2461       /* --- FIRST PATCHABLE BYTE follows --- */
   2462       /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
   2463          to) backs up the return address, so as to find the address of
   2464          the first patchable byte.  So: don't change the length of the
   2465          two instructions below. */
   2466       /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
   2467       *p++ = 0xBA;
   2468       void* disp_cp_chain_me
   2469                = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
   2470                                          : disp_cp_chain_me_to_slowEP;
   2471       p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me));
   2472       /* call *%edx */
   2473       *p++ = 0xFF;
   2474       *p++ = 0xD2;
   2475       /* --- END of PATCHABLE BYTES --- */
   2476 
   2477       /* Fix up the conditional jump, if there was one. */
   2478       if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
   2479          Int delta = p - ptmp;
   2480          vassert(delta > 0 && delta < 40);
   2481          *ptmp = toUChar(delta-1);
   2482       }
   2483       goto done;
   2484    }
   2485 
   2486    case Xin_XIndir: {
   2487       /* We're generating transfers that could lead indirectly to a
   2488          chain-me, so we need to be sure this is actually allowed --
   2489          no-redir translations are not allowed to reach normal
   2490          translations without going through the scheduler.  That means
   2491          no XDirects or XIndirs out from no-redir translations.
   2492          Hence: */
   2493       vassert(disp_cp_xindir != NULL);
   2494 
   2495       /* Use ptmp for backpatching conditional jumps. */
   2496       ptmp = NULL;
   2497 
   2498       /* First off, if this is conditional, create a conditional
   2499          jump over the rest of it. */
   2500       if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
   2501          /* jmp fwds if !condition */
   2502          *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
   2503          ptmp = p; /* fill in this bit later */
   2504          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2505       }
   2506 
   2507       /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
   2508       *p++ = 0x89;
   2509       p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
   2510 
   2511       /* movl $disp_indir, %edx */
   2512       *p++ = 0xBA;
   2513       p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir));
   2514       /* jmp *%edx */
   2515       *p++ = 0xFF;
   2516       *p++ = 0xE2;
   2517 
   2518       /* Fix up the conditional jump, if there was one. */
   2519       if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
   2520          Int delta = p - ptmp;
   2521          vassert(delta > 0 && delta < 40);
   2522          *ptmp = toUChar(delta-1);
   2523       }
   2524       goto done;
   2525    }
   2526 
   2527    case Xin_XAssisted: {
   2528       /* Use ptmp for backpatching conditional jumps. */
   2529       ptmp = NULL;
   2530 
   2531       /* First off, if this is conditional, create a conditional
   2532          jump over the rest of it. */
   2533       if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
   2534          /* jmp fwds if !condition */
   2535          *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
   2536          ptmp = p; /* fill in this bit later */
   2537          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
   2538       }
   2539 
   2540       /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
   2541       *p++ = 0x89;
   2542       p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
   2543       /* movl $magic_number, %ebp. */
   2544       UInt trcval = 0;
   2545       switch (i->Xin.XAssisted.jk) {
   2546          case Ijk_ClientReq:    trcval = VEX_TRC_JMP_CLIENTREQ;    break;
   2547          case Ijk_Sys_syscall:  trcval = VEX_TRC_JMP_SYS_SYSCALL;  break;
   2548          case Ijk_Sys_int128:   trcval = VEX_TRC_JMP_SYS_INT128;   break;
   2549          case Ijk_Sys_int129:   trcval = VEX_TRC_JMP_SYS_INT129;   break;
   2550          case Ijk_Sys_int130:   trcval = VEX_TRC_JMP_SYS_INT130;   break;
   2551          case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break;
   2552          case Ijk_Yield:        trcval = VEX_TRC_JMP_YIELD;        break;
   2553          case Ijk_EmWarn:       trcval = VEX_TRC_JMP_EMWARN;       break;
   2554          case Ijk_MapFail:      trcval = VEX_TRC_JMP_MAPFAIL;      break;
   2555          case Ijk_NoDecode:     trcval = VEX_TRC_JMP_NODECODE;     break;
   2556          case Ijk_InvalICache:  trcval = VEX_TRC_JMP_INVALICACHE;  break;
   2557          case Ijk_NoRedir:      trcval = VEX_TRC_JMP_NOREDIR;      break;
   2558          case Ijk_SigTRAP:      trcval = VEX_TRC_JMP_SIGTRAP;      break;
   2559          case Ijk_SigSEGV:      trcval = VEX_TRC_JMP_SIGSEGV;      break;
   2560          case Ijk_Boring:       trcval = VEX_TRC_JMP_BORING;       break;
   2561          /* We don't expect to see the following being assisted. */
   2562          case Ijk_Ret:
   2563          case Ijk_Call:
   2564          /* fallthrough */
   2565          default:
   2566             ppIRJumpKind(i->Xin.XAssisted.jk);
   2567             vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
   2568       }
   2569       vassert(trcval != 0);
   2570       *p++ = 0xBD;
   2571       p = emit32(p, trcval);
   2572 
   2573       /* movl $disp_indir, %edx */
   2574       *p++ = 0xBA;
   2575       p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted));
   2576       /* jmp *%edx */
   2577       *p++ = 0xFF;
   2578       *p++ = 0xE2;
   2579 
   2580       /* Fix up the conditional jump, if there was one. */
   2581       if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
   2582          Int delta = p - ptmp;
   2583          vassert(delta > 0 && delta < 40);
   2584          *ptmp = toUChar(delta-1);
   2585       }
   2586       goto done;
   2587    }
   2588 
   2589    case Xin_CMov32:
   2590       vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
   2591 
   2592       /* This generates cmov, which is illegal on P54/P55. */
   2593       /*
   2594       *p++ = 0x0F;
   2595       *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
   2596       if (i->Xin.CMov32.src->tag == Xrm_Reg) {
   2597          p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
   2598          goto done;
   2599       }
   2600       if (i->Xin.CMov32.src->tag == Xrm_Mem) {
   2601          p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
   2602          goto done;
   2603       }
   2604       */
   2605 
   2606       /* Alternative version which works on any x86 variant. */
   2607       /* jmp fwds if !condition */
   2608       *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
   2609       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   2610       ptmp = p;
   2611 
   2612       switch (i->Xin.CMov32.src->tag) {
   2613          case Xrm_Reg:
   2614             /* Big sigh.  This is movl E -> G ... */
   2615             *p++ = 0x89;
   2616             p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
   2617                              i->Xin.CMov32.dst);
   2618 
   2619             break;
   2620          case Xrm_Mem:
   2621             /* ... whereas this is movl G -> E.  That's why the args
   2622                to doAMode_R appear to be the wrong way round in the
   2623                Xrm_Reg case. */
   2624             *p++ = 0x8B;
   2625             p = doAMode_M(p, i->Xin.CMov32.dst,
   2626                              i->Xin.CMov32.src->Xrm.Mem.am);
   2627             break;
   2628          default:
   2629             goto bad;
   2630       }
   2631       /* Fill in the jump offset. */
   2632       *(ptmp-1) = toUChar(p - ptmp);
   2633       goto done;
   2634 
   2635       break;
   2636 
   2637    case Xin_LoadEX:
   2638       if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
   2639          /* movzbl */
   2640          *p++ = 0x0F;
   2641          *p++ = 0xB6;
   2642          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2643          goto done;
   2644       }
   2645       if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
   2646          /* movzwl */
   2647          *p++ = 0x0F;
   2648          *p++ = 0xB7;
   2649          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2650          goto done;
   2651       }
   2652       if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
   2653          /* movsbl */
   2654          *p++ = 0x0F;
   2655          *p++ = 0xBE;
   2656          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
   2657          goto done;
   2658       }
   2659       break;
   2660 
   2661    case Xin_Set32:
   2662       /* Make the destination register be 1 or 0, depending on whether
   2663          the relevant condition holds.  We have to dodge and weave
   2664          when the destination is %esi or %edi as we cannot directly
   2665          emit the native 'setb %reg' for those.  Further complication:
   2666          the top 24 bits of the destination should be forced to zero,
   2667          but doing 'xor %r,%r' kills the flag(s) we are about to read.
   2668          Sigh.  So start off my moving $0 into the dest. */
   2669 
   2670       /* Do we need to swap in %eax? */
   2671       if (iregNo(i->Xin.Set32.dst) >= 4) {
   2672          /* xchg %eax, %dst */
   2673          *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
   2674          /* movl $0, %eax */
   2675          *p++ =toUChar(0xB8 + iregNo(hregX86_EAX()));
   2676          p = emit32(p, 0);
   2677          /* setb lo8(%eax) */
   2678          *p++ = 0x0F;
   2679          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
   2680          p = doAMode_R(p, fake(0), hregX86_EAX());
   2681          /* xchg %eax, %dst */
   2682          *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
   2683       } else {
   2684          /* movl $0, %dst */
   2685          *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst));
   2686          p = emit32(p, 0);
   2687          /* setb lo8(%dst) */
   2688          *p++ = 0x0F;
   2689          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
   2690          p = doAMode_R(p, fake(0), i->Xin.Set32.dst);
   2691       }
   2692       goto done;
   2693 
   2694    case Xin_Bsfr32:
   2695       *p++ = 0x0F;
   2696       if (i->Xin.Bsfr32.isFwds) {
   2697          *p++ = 0xBC;
   2698       } else {
   2699          *p++ = 0xBD;
   2700       }
   2701       p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
   2702       goto done;
   2703 
   2704    case Xin_MFence:
   2705       /* see comment in hdefs.h re this insn */
   2706       if (0) vex_printf("EMIT FENCE\n");
   2707       if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
   2708                                   |VEX_HWCAPS_X86_SSE2)) {
   2709          /* mfence */
   2710          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
   2711          goto done;
   2712       }
   2713       if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
   2714          /* sfence */
   2715          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
   2716          /* lock addl $0,0(%esp) */
   2717          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
   2718          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
   2719          goto done;
   2720       }
   2721       if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
   2722          /* lock addl $0,0(%esp) */
   2723          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
   2724          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
   2725          goto done;
   2726       }
   2727       vpanic("emit_X86Instr:mfence:hwcaps");
   2728       /*NOTREACHED*/
   2729       break;
   2730 
   2731    case Xin_ACAS:
   2732       /* lock */
   2733       *p++ = 0xF0;
   2734       /* cmpxchg{b,w,l} %ebx,mem.  Expected-value in %eax, new value
   2735          in %ebx.  The new-value register is hardwired to be %ebx
   2736          since letting it be any integer register gives the problem
   2737          that %sil and %dil are unaddressible on x86 and hence we
   2738          would have to resort to the same kind of trickery as with
   2739          byte-sized Xin.Store, just below.  Given that this isn't
   2740          performance critical, it is simpler just to force the
   2741          register operand to %ebx (could equally be %ecx or %edx).
   2742          (Although %ebx is more consistent with cmpxchg8b.) */
   2743       if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
   2744       *p++ = 0x0F;
   2745       if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
   2746       p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
   2747       goto done;
   2748 
   2749    case Xin_DACAS:
   2750       /* lock */
   2751       *p++ = 0xF0;
   2752       /* cmpxchg8b m64.  Expected-value in %edx:%eax, new value
   2753          in %ecx:%ebx.  All 4 regs are hardwired in the ISA, so
   2754          aren't encoded in the insn. */
   2755       *p++ = 0x0F;
   2756       *p++ = 0xC7;
   2757       p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
   2758       goto done;
   2759 
   2760    case Xin_Store:
   2761       if (i->Xin.Store.sz == 2) {
   2762          /* This case, at least, is simple, given that we can
   2763             reference the low 16 bits of any integer register. */
   2764          *p++ = 0x66;
   2765          *p++ = 0x89;
   2766          p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
   2767          goto done;
   2768       }
   2769 
   2770       if (i->Xin.Store.sz == 1) {
   2771          /* We have to do complex dodging and weaving if src is not
   2772             the low 8 bits of %eax/%ebx/%ecx/%edx. */
   2773          if (iregNo(i->Xin.Store.src) < 4) {
   2774             /* we're OK, can do it directly */
   2775             *p++ = 0x88;
   2776             p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
   2777            goto done;
   2778          } else {
   2779             /* Bleh.  This means the source is %edi or %esi.  Since
   2780                the address mode can only mention three registers, at
   2781                least one of %eax/%ebx/%ecx/%edx must be available to
   2782                temporarily swap the source into, so the store can
   2783                happen.  So we have to look at the regs mentioned
   2784                in the amode. */
   2785             HReg swap = INVALID_HREG;
   2786             HReg  eax = hregX86_EAX(), ebx = hregX86_EBX(),
   2787                   ecx = hregX86_ECX(), edx = hregX86_EDX();
   2788             Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True;
   2789             HRegUsage u;
   2790             Int j;
   2791             initHRegUsage(&u);
   2792             addRegUsage_X86AMode(&u,  i->Xin.Store.dst);
   2793             for (j = 0; j < u.n_used; j++) {
   2794                HReg r = u.hreg[j];
   2795                if (sameHReg(r, eax)) a_ok = False;
   2796                if (sameHReg(r, ebx)) b_ok = False;
   2797                if (sameHReg(r, ecx)) c_ok = False;
   2798                if (sameHReg(r, edx)) d_ok = False;
   2799             }
   2800             if (a_ok) swap = eax;
   2801             if (b_ok) swap = ebx;
   2802             if (c_ok) swap = ecx;
   2803             if (d_ok) swap = edx;
   2804             vassert(! hregIsInvalid(swap));
   2805             /* xchgl %source, %swap. Could do better if swap is %eax. */
   2806             *p++ = 0x87;
   2807             p = doAMode_R(p, i->Xin.Store.src, swap);
   2808             /* movb lo8{%swap}, (dst) */
   2809             *p++ = 0x88;
   2810             p = doAMode_M(p, swap, i->Xin.Store.dst);
   2811             /* xchgl %source, %swap. Could do better if swap is %eax. */
   2812             *p++ = 0x87;
   2813             p = doAMode_R(p, i->Xin.Store.src, swap);
   2814             goto done;
   2815          }
   2816       } /* if (i->Xin.Store.sz == 1) */
   2817       break;
   2818 
   2819    case Xin_FpUnary:
   2820       /* gop %src, %dst
   2821          --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
   2822       */
   2823       p = do_ffree_st7(p);
   2824       p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
   2825       p = do_fop1_st(p, i->Xin.FpUnary.op);
   2826       p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
   2827       goto done;
   2828 
   2829    case Xin_FpBinary:
   2830       if (i->Xin.FpBinary.op == Xfp_YL2X
   2831           || i->Xin.FpBinary.op == Xfp_YL2XP1) {
   2832          /* Have to do this specially. */
   2833          /* ffree %st7 ; fld %st(srcL) ;
   2834             ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
   2835          p = do_ffree_st7(p);
   2836          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2837          p = do_ffree_st7(p);
   2838          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
   2839          *p++ = 0xD9;
   2840          *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
   2841          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2842          goto done;
   2843       }
   2844       if (i->Xin.FpBinary.op == Xfp_ATAN) {
   2845          /* Have to do this specially. */
   2846          /* ffree %st7 ; fld %st(srcL) ;
   2847             ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
   2848          p = do_ffree_st7(p);
   2849          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2850          p = do_ffree_st7(p);
   2851          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
   2852          *p++ = 0xD9; *p++ = 0xF3;
   2853          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2854          goto done;
   2855       }
   2856       if (i->Xin.FpBinary.op == Xfp_PREM
   2857           || i->Xin.FpBinary.op == Xfp_PREM1
   2858           || i->Xin.FpBinary.op == Xfp_SCALE) {
   2859          /* Have to do this specially. */
   2860          /* ffree %st7 ; fld %st(srcR) ;
   2861             ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
   2862             fincstp ; ffree %st7 */
   2863          p = do_ffree_st7(p);
   2864          p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
   2865          p = do_ffree_st7(p);
   2866          p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
   2867          *p++ = 0xD9;
   2868          switch (i->Xin.FpBinary.op) {
   2869             case Xfp_PREM: *p++ = 0xF8; break;
   2870             case Xfp_PREM1: *p++ = 0xF5; break;
   2871             case Xfp_SCALE: *p++ =  0xFD; break;
   2872             default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
   2873          }
   2874          p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
   2875          *p++ = 0xD9; *p++ = 0xF7;
   2876          p = do_ffree_st7(p);
   2877          goto done;
   2878       }
   2879       /* General case */
   2880       /* gop %srcL, %srcR, %dst
   2881          --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
   2882       */
   2883       p = do_ffree_st7(p);
   2884       p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
   2885       p = do_fop2_st(p, i->Xin.FpBinary.op,
   2886                         1+hregNumber(i->Xin.FpBinary.srcR));
   2887       p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
   2888       goto done;
   2889 
   2890    case Xin_FpLdSt:
   2891       if (i->Xin.FpLdSt.isLoad) {
   2892          /* Load from memory into %fakeN.
   2893             --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
   2894          */
   2895          p = do_ffree_st7(p);
   2896          switch (i->Xin.FpLdSt.sz) {
   2897             case 4:
   2898                *p++ = 0xD9;
   2899                p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
   2900                break;
   2901             case 8:
   2902                *p++ = 0xDD;
   2903                p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
   2904                break;
   2905             case 10:
   2906                *p++ = 0xDB;
   2907                p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
   2908                break;
   2909             default:
   2910                vpanic("emitX86Instr(FpLdSt,load)");
   2911          }
   2912          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
   2913          goto done;
   2914       } else {
   2915          /* Store from %fakeN into memory.
   2916             --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
   2917 	 */
   2918          p = do_ffree_st7(p);
   2919          p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
   2920          switch (i->Xin.FpLdSt.sz) {
   2921             case 4:
   2922                *p++ = 0xD9;
   2923                p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
   2924                break;
   2925             case 8:
   2926                *p++ = 0xDD;
   2927                p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
   2928                break;
   2929             case 10:
   2930                *p++ = 0xDB;
   2931                p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
   2932                break;
   2933             default:
   2934                vpanic("emitX86Instr(FpLdSt,store)");
   2935          }
   2936          goto done;
   2937       }
   2938       break;
   2939 
   2940    case Xin_FpLdStI:
   2941       if (i->Xin.FpLdStI.isLoad) {
   2942          /* Load from memory into %fakeN, converting from an int.
   2943             --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
   2944          */
   2945          switch (i->Xin.FpLdStI.sz) {
   2946             case 8:  opc = 0xDF; subopc_imm = 5; break;
   2947             case 4:  opc = 0xDB; subopc_imm = 0; break;
   2948             case 2:  vassert(0); opc = 0xDF; subopc_imm = 0; break;
   2949             default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
   2950          }
   2951          p = do_ffree_st7(p);
   2952          *p++ = toUChar(opc);
   2953          p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
   2954          p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
   2955          goto done;
   2956       } else {
   2957          /* Store from %fakeN into memory, converting to an int.
   2958             --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
   2959 	 */
   2960          switch (i->Xin.FpLdStI.sz) {
   2961             case 8:  opc = 0xDF; subopc_imm = 7; break;
   2962             case 4:  opc = 0xDB; subopc_imm = 3; break;
   2963             case 2:  opc = 0xDF; subopc_imm = 3; break;
   2964             default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
   2965          }
   2966          p = do_ffree_st7(p);
   2967          p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
   2968          *p++ = toUChar(opc);
   2969          p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
   2970          goto done;
   2971       }
   2972       break;
   2973 
   2974    case Xin_Fp64to32:
   2975       /* ffree %st7 ; fld %st(src) */
   2976       p = do_ffree_st7(p);
   2977       p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
   2978       /* subl $4, %esp */
   2979       *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
   2980       /* fstps (%esp) */
   2981       *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
   2982       /* flds (%esp) */
   2983       *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
   2984       /* addl $4, %esp */
   2985       *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
   2986       /* fstp %st(1+dst) */
   2987       p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
   2988       goto done;
   2989 
   2990    case Xin_FpCMov:
   2991       /* jmp fwds if !condition */
   2992       *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
   2993       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   2994       ptmp = p;
   2995 
   2996       /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
   2997       p = do_ffree_st7(p);
   2998       p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
   2999       p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
   3000 
   3001       /* Fill in the jump offset. */
   3002       *(ptmp-1) = toUChar(p - ptmp);
   3003       goto done;
   3004 
   3005    case Xin_FpLdCW:
   3006       *p++ = 0xD9;
   3007       p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr);
   3008       goto done;
   3009 
   3010    case Xin_FpStSW_AX:
   3011       /* note, this emits fnstsw %ax, not fstsw %ax */
   3012       *p++ = 0xDF;
   3013       *p++ = 0xE0;
   3014       goto done;
   3015 
   3016    case Xin_FpCmp:
   3017       /* gcmp %fL, %fR, %dst
   3018          -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
   3019             fnstsw %ax ; movl %eax, %dst
   3020       */
   3021       /* ffree %st7 */
   3022       p = do_ffree_st7(p);
   3023       /* fpush %fL */
   3024       p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
   3025       /* fucomp %(fR+1) */
   3026       *p++ = 0xDD;
   3027       *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))));
   3028       /* fnstsw %ax */
   3029       *p++ = 0xDF;
   3030       *p++ = 0xE0;
   3031       /*  movl %eax, %dst */
   3032       *p++ = 0x89;
   3033       p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
   3034       goto done;
   3035 
   3036    case Xin_SseConst: {
   3037       UShort con = i->Xin.SseConst.con;
   3038       p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
   3039       p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
   3040       p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
   3041       p = push_word_from_tags(p, toUShort(con & 0xF));
   3042       /* movl (%esp), %xmm-dst */
   3043       *p++ = 0x0F;
   3044       *p++ = 0x10;
   3045       *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)));
   3046       *p++ = 0x24;
   3047       /* addl $16, %esp */
   3048       *p++ = 0x83;
   3049       *p++ = 0xC4;
   3050       *p++ = 0x10;
   3051       goto done;
   3052    }
   3053 
   3054    case Xin_SseLdSt:
   3055       *p++ = 0x0F;
   3056       *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
   3057       p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr);
   3058       goto done;
   3059 
   3060    case Xin_SseLdzLO:
   3061       vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
   3062       /* movs[sd] amode, %xmm-dst */
   3063       *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
   3064       *p++ = 0x0F;
   3065       *p++ = 0x10;
   3066       p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)),
   3067                        i->Xin.SseLdzLO.addr);
   3068       goto done;
   3069 
   3070    case Xin_Sse32Fx4:
   3071       xtra = 0;
   3072       *p++ = 0x0F;
   3073       switch (i->Xin.Sse32Fx4.op) {
   3074          case Xsse_ADDF:   *p++ = 0x58; break;
   3075          case Xsse_DIVF:   *p++ = 0x5E; break;
   3076          case Xsse_MAXF:   *p++ = 0x5F; break;
   3077          case Xsse_MINF:   *p++ = 0x5D; break;
   3078          case Xsse_MULF:   *p++ = 0x59; break;
   3079          case Xsse_RCPF:   *p++ = 0x53; break;
   3080          case Xsse_RSQRTF: *p++ = 0x52; break;
   3081          case Xsse_SQRTF:  *p++ = 0x51; break;
   3082          case Xsse_SUBF:   *p++ = 0x5C; break;
   3083          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3084          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3085          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3086          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3087          default: goto bad;
   3088       }
   3089       p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)),
   3090                        fake(vregNo(i->Xin.Sse32Fx4.src)) );
   3091       if (xtra & 0x100)
   3092          *p++ = toUChar(xtra & 0xFF);
   3093       goto done;
   3094 
   3095    case Xin_Sse64Fx2:
   3096       xtra = 0;
   3097       *p++ = 0x66;
   3098       *p++ = 0x0F;
   3099       switch (i->Xin.Sse64Fx2.op) {
   3100          case Xsse_ADDF:   *p++ = 0x58; break;
   3101          case Xsse_DIVF:   *p++ = 0x5E; break;
   3102          case Xsse_MAXF:   *p++ = 0x5F; break;
   3103          case Xsse_MINF:   *p++ = 0x5D; break;
   3104          case Xsse_MULF:   *p++ = 0x59; break;
   3105          case Xsse_RCPF:   *p++ = 0x53; break;
   3106          case Xsse_RSQRTF: *p++ = 0x52; break;
   3107          case Xsse_SQRTF:  *p++ = 0x51; break;
   3108          case Xsse_SUBF:   *p++ = 0x5C; break;
   3109          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3110          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3111          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3112          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3113          default: goto bad;
   3114       }
   3115       p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)),
   3116                        fake(vregNo(i->Xin.Sse64Fx2.src)) );
   3117       if (xtra & 0x100)
   3118          *p++ = toUChar(xtra & 0xFF);
   3119       goto done;
   3120 
   3121    case Xin_Sse32FLo:
   3122       xtra = 0;
   3123       *p++ = 0xF3;
   3124       *p++ = 0x0F;
   3125       switch (i->Xin.Sse32FLo.op) {
   3126          case Xsse_ADDF:   *p++ = 0x58; break;
   3127          case Xsse_DIVF:   *p++ = 0x5E; break;
   3128          case Xsse_MAXF:   *p++ = 0x5F; break;
   3129          case Xsse_MINF:   *p++ = 0x5D; break;
   3130          case Xsse_MULF:   *p++ = 0x59; break;
   3131          case Xsse_RCPF:   *p++ = 0x53; break;
   3132          case Xsse_RSQRTF: *p++ = 0x52; break;
   3133          case Xsse_SQRTF:  *p++ = 0x51; break;
   3134          case Xsse_SUBF:   *p++ = 0x5C; break;
   3135          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3136          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3137          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3138          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3139          default: goto bad;
   3140       }
   3141       p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)),
   3142                        fake(vregNo(i->Xin.Sse32FLo.src)) );
   3143       if (xtra & 0x100)
   3144          *p++ = toUChar(xtra & 0xFF);
   3145       goto done;
   3146 
   3147    case Xin_Sse64FLo:
   3148       xtra = 0;
   3149       *p++ = 0xF2;
   3150       *p++ = 0x0F;
   3151       switch (i->Xin.Sse64FLo.op) {
   3152          case Xsse_ADDF:   *p++ = 0x58; break;
   3153          case Xsse_DIVF:   *p++ = 0x5E; break;
   3154          case Xsse_MAXF:   *p++ = 0x5F; break;
   3155          case Xsse_MINF:   *p++ = 0x5D; break;
   3156          case Xsse_MULF:   *p++ = 0x59; break;
   3157          case Xsse_RCPF:   *p++ = 0x53; break;
   3158          case Xsse_RSQRTF: *p++ = 0x52; break;
   3159          case Xsse_SQRTF:  *p++ = 0x51; break;
   3160          case Xsse_SUBF:   *p++ = 0x5C; break;
   3161          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
   3162          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
   3163          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
   3164          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
   3165          default: goto bad;
   3166       }
   3167       p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)),
   3168                        fake(vregNo(i->Xin.Sse64FLo.src)) );
   3169       if (xtra & 0x100)
   3170          *p++ = toUChar(xtra & 0xFF);
   3171       goto done;
   3172 
   3173    case Xin_SseReRg:
   3174 #     define XX(_n) *p++ = (_n)
   3175       switch (i->Xin.SseReRg.op) {
   3176          case Xsse_MOV:     /*movups*/ XX(0x0F); XX(0x10); break;
   3177          case Xsse_OR:                 XX(0x0F); XX(0x56); break;
   3178          case Xsse_XOR:                XX(0x0F); XX(0x57); break;
   3179          case Xsse_AND:                XX(0x0F); XX(0x54); break;
   3180          case Xsse_PACKSSD:  XX(0x66); XX(0x0F); XX(0x6B); break;
   3181          case Xsse_PACKSSW:  XX(0x66); XX(0x0F); XX(0x63); break;
   3182          case Xsse_PACKUSW:  XX(0x66); XX(0x0F); XX(0x67); break;
   3183          case Xsse_ADD8:     XX(0x66); XX(0x0F); XX(0xFC); break;
   3184          case Xsse_ADD16:    XX(0x66); XX(0x0F); XX(0xFD); break;
   3185          case Xsse_ADD32:    XX(0x66); XX(0x0F); XX(0xFE); break;
   3186          case Xsse_ADD64:    XX(0x66); XX(0x0F); XX(0xD4); break;
   3187          case Xsse_QADD8S:   XX(0x66); XX(0x0F); XX(0xEC); break;
   3188          case Xsse_QADD16S:  XX(0x66); XX(0x0F); XX(0xED); break;
   3189          case Xsse_QADD8U:   XX(0x66); XX(0x0F); XX(0xDC); break;
   3190          case Xsse_QADD16U:  XX(0x66); XX(0x0F); XX(0xDD); break;
   3191          case Xsse_AVG8U:    XX(0x66); XX(0x0F); XX(0xE0); break;
   3192          case Xsse_AVG16U:   XX(0x66); XX(0x0F); XX(0xE3); break;
   3193          case Xsse_CMPEQ8:   XX(0x66); XX(0x0F); XX(0x74); break;
   3194          case Xsse_CMPEQ16:  XX(0x66); XX(0x0F); XX(0x75); break;
   3195          case Xsse_CMPEQ32:  XX(0x66); XX(0x0F); XX(0x76); break;
   3196          case Xsse_CMPGT8S:  XX(0x66); XX(0x0F); XX(0x64); break;
   3197          case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
   3198          case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
   3199          case Xsse_MAX16S:   XX(0x66); XX(0x0F); XX(0xEE); break;
   3200          case Xsse_MAX8U:    XX(0x66); XX(0x0F); XX(0xDE); break;
   3201          case Xsse_MIN16S:   XX(0x66); XX(0x0F); XX(0xEA); break;
   3202          case Xsse_MIN8U:    XX(0x66); XX(0x0F); XX(0xDA); break;
   3203          case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
   3204          case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
   3205          case Xsse_MUL16:    XX(0x66); XX(0x0F); XX(0xD5); break;
   3206          case Xsse_SHL16:    XX(0x66); XX(0x0F); XX(0xF1); break;
   3207          case Xsse_SHL32:    XX(0x66); XX(0x0F); XX(0xF2); break;
   3208          case Xsse_SHL64:    XX(0x66); XX(0x0F); XX(0xF3); break;
   3209          case Xsse_SAR16:    XX(0x66); XX(0x0F); XX(0xE1); break;
   3210          case Xsse_SAR32:    XX(0x66); XX(0x0F); XX(0xE2); break;
   3211          case Xsse_SHR16:    XX(0x66); XX(0x0F); XX(0xD1); break;
   3212          case Xsse_SHR32:    XX(0x66); XX(0x0F); XX(0xD2); break;
   3213          case Xsse_SHR64:    XX(0x66); XX(0x0F); XX(0xD3); break;
   3214          case Xsse_SUB8:     XX(0x66); XX(0x0F); XX(0xF8); break;
   3215          case Xsse_SUB16:    XX(0x66); XX(0x0F); XX(0xF9); break;
   3216          case Xsse_SUB32:    XX(0x66); XX(0x0F); XX(0xFA); break;
   3217          case Xsse_SUB64:    XX(0x66); XX(0x0F); XX(0xFB); break;
   3218          case Xsse_QSUB8S:   XX(0x66); XX(0x0F); XX(0xE8); break;
   3219          case Xsse_QSUB16S:  XX(0x66); XX(0x0F); XX(0xE9); break;
   3220          case Xsse_QSUB8U:   XX(0x66); XX(0x0F); XX(0xD8); break;
   3221          case Xsse_QSUB16U:  XX(0x66); XX(0x0F); XX(0xD9); break;
   3222          case Xsse_UNPCKHB:  XX(0x66); XX(0x0F); XX(0x68); break;
   3223          case Xsse_UNPCKHW:  XX(0x66); XX(0x0F); XX(0x69); break;
   3224          case Xsse_UNPCKHD:  XX(0x66); XX(0x0F); XX(0x6A); break;
   3225          case Xsse_UNPCKHQ:  XX(0x66); XX(0x0F); XX(0x6D); break;
   3226          case Xsse_UNPCKLB:  XX(0x66); XX(0x0F); XX(0x60); break;
   3227          case Xsse_UNPCKLW:  XX(0x66); XX(0x0F); XX(0x61); break;
   3228          case Xsse_UNPCKLD:  XX(0x66); XX(0x0F); XX(0x62); break;
   3229          case Xsse_UNPCKLQ:  XX(0x66); XX(0x0F); XX(0x6C); break;
   3230          default: goto bad;
   3231       }
   3232       p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)),
   3233                        fake(vregNo(i->Xin.SseReRg.src)) );
   3234 #     undef XX
   3235       goto done;
   3236 
   3237    case Xin_SseCMov:
   3238       /* jmp fwds if !condition */
   3239       *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
   3240       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
   3241       ptmp = p;
   3242 
   3243       /* movaps %src, %dst */
   3244       *p++ = 0x0F;
   3245       *p++ = 0x28;
   3246       p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)),
   3247                        fake(vregNo(i->Xin.SseCMov.src)) );
   3248 
   3249       /* Fill in the jump offset. */
   3250       *(ptmp-1) = toUChar(p - ptmp);
   3251       goto done;
   3252 
   3253    case Xin_SseShuf:
   3254       *p++ = 0x66;
   3255       *p++ = 0x0F;
   3256       *p++ = 0x70;
   3257       p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)),
   3258                        fake(vregNo(i->Xin.SseShuf.src)) );
   3259       *p++ = (UChar)(i->Xin.SseShuf.order);
   3260       goto done;
   3261 
   3262    case Xin_EvCheck: {
   3263       /* We generate:
   3264             (3 bytes)  decl 4(%ebp)    4 == offsetof(host_EvC_COUNTER)
   3265             (2 bytes)  jns  nofail     expected taken
   3266             (3 bytes)  jmp* 0(%ebp)    0 == offsetof(host_EvC_FAILADDR)
   3267             nofail:
   3268       */
   3269       /* This is heavily asserted re instruction lengths.  It needs to
   3270          be.  If we get given unexpected forms of .amCounter or
   3271          .amFailAddr -- basically, anything that's not of the form
   3272          uimm7(%ebp) -- they are likely to fail. */
   3273       /* Note also that after the decl we must be very careful not to
   3274          read the carry flag, else we get a partial flags stall.
   3275          js/jns avoids that, though. */
   3276       UChar* p0 = p;
   3277       /* ---  decl 8(%ebp) --- */
   3278       /* "fake(1)" because + there's no register in this encoding;
   3279          instead the register + field is used as a sub opcode.  The
   3280          encoding for "decl r/m32" + is FF /1, hence the fake(1). */
   3281       *p++ = 0xFF;
   3282       p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter);
   3283       vassert(p - p0 == 3);
   3284       /* --- jns nofail --- */
   3285       *p++ = 0x79;
   3286       *p++ = 0x03; /* need to check this 0x03 after the next insn */
   3287       vassert(p - p0 == 5);
   3288       /* --- jmp* 0(%ebp) --- */
   3289       /* The encoding is FF /4. */
   3290       *p++ = 0xFF;
   3291       p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr);
   3292       vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
   3293       /* And crosscheck .. */
   3294       vassert(evCheckSzB_X86() == 8);
   3295       goto done;
   3296    }
   3297 
   3298    case Xin_ProfInc: {
   3299       /* We generate   addl $1,NotKnownYet
   3300                        adcl $0,NotKnownYet+4
   3301          in the expectation that a later call to LibVEX_patchProfCtr
   3302          will be used to fill in the immediate fields once the right
   3303          value is known.
   3304            83 05  00 00 00 00  01
   3305            83 15  00 00 00 00  00
   3306       */
   3307       *p++ = 0x83; *p++ = 0x05;
   3308       *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
   3309       *p++ = 0x01;
   3310       *p++ = 0x83; *p++ = 0x15;
   3311       *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
   3312       *p++ = 0x00;
   3313       /* Tell the caller .. */
   3314       vassert(!(*is_profInc));
   3315       *is_profInc = True;
   3316       goto done;
   3317    }
   3318 
   3319    default:
   3320       goto bad;
   3321    }
   3322 
   3323   bad:
   3324    ppX86Instr(i, mode64);
   3325    vpanic("emit_X86Instr");
   3326    /*NOTREACHED*/
   3327 
   3328   done:
   3329    vassert(p - &buf[0] <= 32);
   3330    return p - &buf[0];
   3331 
   3332 #  undef fake
   3333 }
   3334 
   3335 
   3336 /* How big is an event check?  See case for Xin_EvCheck in
   3337    emit_X86Instr just above.  That crosschecks what this returns, so
   3338    we can tell if we're inconsistent. */
   3339 Int evCheckSzB_X86 ( void )
   3340 {
   3341    return 8;
   3342 }
   3343 
   3344 
   3345 /* NB: what goes on here has to be very closely coordinated with the
   3346    emitInstr case for XDirect, above. */
   3347 VexInvalRange chainXDirect_X86 ( void* place_to_chain,
   3348                                  void* disp_cp_chain_me_EXPECTED,
   3349                                  void* place_to_jump_to )
   3350 {
   3351    /* What we're expecting to see is:
   3352         movl $disp_cp_chain_me_EXPECTED, %edx
   3353         call *%edx
   3354       viz
   3355         BA <4 bytes value == disp_cp_chain_me_EXPECTED>
   3356         FF D2
   3357    */
   3358    UChar* p = (UChar*)place_to_chain;
   3359    vassert(p[0] == 0xBA);
   3360    vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
   3361    vassert(p[5] == 0xFF);
   3362    vassert(p[6] == 0xD2);
   3363    /* And what we want to change it to is:
   3364           jmp disp32   where disp32 is relative to the next insn
   3365           ud2;
   3366         viz
   3367           E9 <4 bytes == disp32>
   3368           0F 0B
   3369       The replacement has the same length as the original.
   3370    */
   3371    /* This is the delta we need to put into a JMP d32 insn.  It's
   3372       relative to the start of the next insn, hence the -5.  */
   3373    Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
   3374 
   3375    /* And make the modifications. */
   3376    p[0] = 0xE9;
   3377    p[1] = (delta >> 0) & 0xFF;
   3378    p[2] = (delta >> 8) & 0xFF;
   3379    p[3] = (delta >> 16) & 0xFF;
   3380    p[4] = (delta >> 24) & 0xFF;
   3381    p[5] = 0x0F; p[6]  = 0x0B;
   3382    /* sanity check on the delta -- top 32 are all 0 or all 1 */
   3383    delta >>= 32;
   3384    vassert(delta == 0LL || delta == -1LL);
   3385    VexInvalRange vir = { (HWord)place_to_chain, 7 };
   3386    return vir;
   3387 }
   3388 
   3389 
   3390 /* NB: what goes on here has to be very closely coordinated with the
   3391    emitInstr case for XDirect, above. */
   3392 VexInvalRange unchainXDirect_X86 ( void* place_to_unchain,
   3393                                    void* place_to_jump_to_EXPECTED,
   3394                                    void* disp_cp_chain_me )
   3395 {
   3396    /* What we're expecting to see is:
   3397           jmp d32
   3398           ud2;
   3399        viz
   3400           E9 <4 bytes == disp32>
   3401           0F 0B
   3402    */
   3403    UChar* p     = (UChar*)place_to_unchain;
   3404    Bool   valid = False;
   3405    if (p[0] == 0xE9
   3406        && p[5]  == 0x0F && p[6]  == 0x0B) {
   3407       /* Check the offset is right. */
   3408       Int s32 = *(Int*)(&p[1]);
   3409       if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) {
   3410          valid = True;
   3411          if (0)
   3412             vex_printf("QQQ unchainXDirect_X86: found valid\n");
   3413       }
   3414    }
   3415    vassert(valid);
   3416    /* And what we want to change it to is:
   3417          movl $disp_cp_chain_me, %edx
   3418          call *%edx
   3419       viz
   3420          BA <4 bytes value == disp_cp_chain_me_EXPECTED>
   3421          FF D2
   3422       So it's the same length (convenient, huh).
   3423    */
   3424    p[0] = 0xBA;
   3425    *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me);
   3426    p[5] = 0xFF;
   3427    p[6] = 0xD2;
   3428    VexInvalRange vir = { (HWord)place_to_unchain, 7 };
   3429    return vir;
   3430 }
   3431 
   3432 
   3433 /* Patch the counter address into a profile inc point, as previously
   3434    created by the Xin_ProfInc case for emit_X86Instr. */
   3435 VexInvalRange patchProfInc_X86 ( void*  place_to_patch,
   3436                                  ULong* location_of_counter )
   3437 {
   3438    vassert(sizeof(ULong*) == 4);
   3439    UChar* p = (UChar*)place_to_patch;
   3440    vassert(p[0] == 0x83);
   3441    vassert(p[1] == 0x05);
   3442    vassert(p[2] == 0x00);
   3443    vassert(p[3] == 0x00);
   3444    vassert(p[4] == 0x00);
   3445    vassert(p[5] == 0x00);
   3446    vassert(p[6] == 0x01);
   3447    vassert(p[7] == 0x83);
   3448    vassert(p[8] == 0x15);
   3449    vassert(p[9] == 0x00);
   3450    vassert(p[10] == 0x00);
   3451    vassert(p[11] == 0x00);
   3452    vassert(p[12] == 0x00);
   3453    vassert(p[13] == 0x00);
   3454    UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter);
   3455    p[2] = imm32 & 0xFF; imm32 >>= 8;
   3456    p[3] = imm32 & 0xFF; imm32 >>= 8;
   3457    p[4] = imm32 & 0xFF; imm32 >>= 8;
   3458    p[5] = imm32 & 0xFF; imm32 >>= 8;
   3459    imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter);
   3460    p[9]  = imm32 & 0xFF; imm32 >>= 8;
   3461    p[10] = imm32 & 0xFF; imm32 >>= 8;
   3462    p[11] = imm32 & 0xFF; imm32 >>= 8;
   3463    p[12] = imm32 & 0xFF; imm32 >>= 8;
   3464    VexInvalRange vir = { (HWord)place_to_patch, 14 };
   3465    return vir;
   3466 }
   3467 
   3468 
   3469 /*---------------------------------------------------------------*/
   3470 /*--- end                                     host_x86_defs.c ---*/
   3471 /*---------------------------------------------------------------*/
   3472