Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceAssemblerX86BaseImpl.h - base x86 assembler -*- C++ -*-=//
      2 // Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
      3 // for details. All rights reserved. Use of this source code is governed by a
      4 // BSD-style license that can be found in the LICENSE file.
      5 //
      6 // Modified by the Subzero authors.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //                        The Subzero Code Generator
     11 //
     12 // This file is distributed under the University of Illinois Open Source
     13 // License. See LICENSE.TXT for details.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 //
     17 /// \file
     18 /// \brief Implements the AssemblerX86Base template class, which is the base
     19 /// Assembler class for X86 assemblers.
     20 //
     21 //===----------------------------------------------------------------------===//
     22 
     23 #include "IceAssemblerX86Base.h"
     24 
     25 #include "IceCfg.h"
     26 #include "IceCfgNode.h"
     27 #include "IceOperand.h"
     28 
     29 namespace Ice {
     30 namespace X86NAMESPACE {
     31 
     32 template <typename TraitsType>
     33 AssemblerX86Base<TraitsType>::~AssemblerX86Base() {
     34   if (BuildDefs::asserts()) {
     35     for (const Label *Label : CfgNodeLabels) {
     36       Label->finalCheck();
     37     }
     38     for (const Label *Label : LocalLabels) {
     39       Label->finalCheck();
     40     }
     41   }
     42 }
     43 
     44 template <typename TraitsType>
     45 void AssemblerX86Base<TraitsType>::alignFunction() {
     46   const SizeT Align = 1 << getBundleAlignLog2Bytes();
     47   SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
     48   constexpr SizeT HltSize = 1;
     49   while (BytesNeeded > 0) {
     50     hlt();
     51     BytesNeeded -= HltSize;
     52   }
     53 }
     54 
     55 template <typename TraitsType>
     56 typename AssemblerX86Base<TraitsType>::Label *
     57 AssemblerX86Base<TraitsType>::getOrCreateLabel(SizeT Number,
     58                                                LabelVector &Labels) {
     59   Label *L = nullptr;
     60   if (Number == Labels.size()) {
     61     L = new (this->allocate<Label>()) Label();
     62     Labels.push_back(L);
     63     return L;
     64   }
     65   if (Number > Labels.size()) {
     66     Utils::reserveAndResize(Labels, Number + 1);
     67   }
     68   L = Labels[Number];
     69   if (!L) {
     70     L = new (this->allocate<Label>()) Label();
     71     Labels[Number] = L;
     72   }
     73   return L;
     74 }
     75 
     76 template <typename TraitsType>
     77 Ice::Label *AssemblerX86Base<TraitsType>::getCfgNodeLabel(SizeT NodeNumber) {
     78   assert(NodeNumber < CfgNodeLabels.size());
     79   return CfgNodeLabels[NodeNumber];
     80 }
     81 
     82 template <typename TraitsType>
     83 typename AssemblerX86Base<TraitsType>::Label *
     84 AssemblerX86Base<TraitsType>::getOrCreateCfgNodeLabel(SizeT NodeNumber) {
     85   return getOrCreateLabel(NodeNumber, CfgNodeLabels);
     86 }
     87 
     88 template <typename TraitsType>
     89 typename AssemblerX86Base<TraitsType>::Label *
     90 AssemblerX86Base<TraitsType>::getOrCreateLocalLabel(SizeT Number) {
     91   return getOrCreateLabel(Number, LocalLabels);
     92 }
     93 
     94 template <typename TraitsType>
     95 void AssemblerX86Base<TraitsType>::bindCfgNodeLabel(const CfgNode *Node) {
     96   assert(!getPreliminary());
     97   Label *L = getOrCreateCfgNodeLabel(Node->getIndex());
     98   this->bind(L);
     99 }
    100 
    101 template <typename TraitsType>
    102 void AssemblerX86Base<TraitsType>::bindLocalLabel(SizeT Number) {
    103   Label *L = getOrCreateLocalLabel(Number);
    104   if (!getPreliminary())
    105     this->bind(L);
    106 }
    107 
    108 template <typename TraitsType>
    109 void AssemblerX86Base<TraitsType>::call(GPRRegister reg) {
    110   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    111   emitRexB(RexTypeIrrelevant, reg);
    112   emitUint8(0xFF);
    113   emitRegisterOperand(2, gprEncoding(reg));
    114 }
    115 
    116 template <typename TraitsType>
    117 void AssemblerX86Base<TraitsType>::call(const Address &address) {
    118   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    119   emitAddrSizeOverridePrefix();
    120   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
    121   emitUint8(0xFF);
    122   emitOperand(2, address);
    123 }
    124 
    125 template <typename TraitsType>
    126 void AssemblerX86Base<TraitsType>::call(const ConstantRelocatable *label) {
    127   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    128   intptr_t call_start = Buffer.getPosition();
    129   emitUint8(0xE8);
    130   auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
    131   Fixup->set_addend(-4);
    132   emitFixup(Fixup);
    133   emitInt32(0);
    134   assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
    135   (void)call_start;
    136 }
    137 
    138 template <typename TraitsType>
    139 void AssemblerX86Base<TraitsType>::call(const Immediate &abs_address) {
    140   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    141   intptr_t call_start = Buffer.getPosition();
    142   emitUint8(0xE8);
    143   auto *Fixup = this->createFixup(Traits::FK_PcRel, AssemblerFixup::NullSymbol);
    144   Fixup->set_addend(abs_address.value() - 4);
    145   emitFixup(Fixup);
    146   emitInt32(0);
    147   assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
    148   (void)call_start;
    149 }
    150 
    151 template <typename TraitsType>
    152 void AssemblerX86Base<TraitsType>::pushl(GPRRegister reg) {
    153   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    154   emitRexB(RexTypeIrrelevant, reg);
    155   emitUint8(0x50 + gprEncoding(reg));
    156 }
    157 
    158 template <typename TraitsType>
    159 void AssemblerX86Base<TraitsType>::pushl(const Immediate &Imm) {
    160   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    161   emitUint8(0x68);
    162   emitInt32(Imm.value());
    163 }
    164 
    165 template <typename TraitsType>
    166 void AssemblerX86Base<TraitsType>::pushl(const ConstantRelocatable *Label) {
    167   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    168   emitUint8(0x68);
    169   emitFixup(this->createFixup(Traits::FK_Abs, Label));
    170   // In x86-32, the emitted value is an addend to the relocation. Therefore, we
    171   // must emit a 0 (because we're pushing an absolute relocation.)
    172   // In x86-64, the emitted value does not matter (the addend lives in the
    173   // relocation record as an extra field.)
    174   emitInt32(0);
    175 }
    176 
    177 template <typename TraitsType>
    178 void AssemblerX86Base<TraitsType>::popl(GPRRegister reg) {
    179   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    180   // Any type that would not force a REX prefix to be emitted can be provided
    181   // here.
    182   emitRexB(RexTypeIrrelevant, reg);
    183   emitUint8(0x58 + gprEncoding(reg));
    184 }
    185 
    186 template <typename TraitsType>
    187 void AssemblerX86Base<TraitsType>::popl(const Address &address) {
    188   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    189   emitAddrSizeOverridePrefix();
    190   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
    191   emitUint8(0x8F);
    192   emitOperand(0, address);
    193 }
    194 
    195 template <typename TraitsType>
    196 template <typename, typename>
    197 void AssemblerX86Base<TraitsType>::pushal() {
    198   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    199   emitUint8(0x60);
    200 }
    201 
    202 template <typename TraitsType>
    203 template <typename, typename>
    204 void AssemblerX86Base<TraitsType>::popal() {
    205   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    206   emitUint8(0x61);
    207 }
    208 
    209 template <typename TraitsType>
    210 void AssemblerX86Base<TraitsType>::setcc(BrCond condition, ByteRegister dst) {
    211   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    212   emitRexB(IceType_i8, dst);
    213   emitUint8(0x0F);
    214   emitUint8(0x90 + condition);
    215   emitUint8(0xC0 + gprEncoding(dst));
    216 }
    217 
    218 template <typename TraitsType>
    219 void AssemblerX86Base<TraitsType>::setcc(BrCond condition,
    220                                          const Address &address) {
    221   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    222   emitAddrSizeOverridePrefix();
    223   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
    224   emitUint8(0x0F);
    225   emitUint8(0x90 + condition);
    226   emitOperand(0, address);
    227 }
    228 
    229 template <typename TraitsType>
    230 void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
    231                                        const Immediate &imm) {
    232   assert(Ty != IceType_i64 && "i64 not supported yet.");
    233   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    234   if (Ty == IceType_i16)
    235     emitOperandSizeOverride();
    236   emitRexB(Ty, dst);
    237   if (isByteSizedType(Ty)) {
    238     emitUint8(0xB0 + gprEncoding(dst));
    239     emitUint8(imm.value() & 0xFF);
    240   } else {
    241     // TODO(jpp): When removing the assertion above ensure that in x86-64 we
    242     // emit a 64-bit immediate.
    243     emitUint8(0xB8 + gprEncoding(dst));
    244     emitImmediate(Ty, imm);
    245   }
    246 }
    247 
    248 template <typename TraitsType>
    249 void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
    250                                        GPRRegister src) {
    251   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    252   if (Ty == IceType_i16)
    253     emitOperandSizeOverride();
    254   emitRexRB(Ty, src, dst);
    255   if (isByteSizedType(Ty)) {
    256     emitUint8(0x88);
    257   } else {
    258     emitUint8(0x89);
    259   }
    260   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
    261 }
    262 
    263 template <typename TraitsType>
    264 void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
    265                                        const Address &src) {
    266   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    267   if (Ty == IceType_i16)
    268     emitOperandSizeOverride();
    269   emitAddrSizeOverridePrefix();
    270   emitRex(Ty, src, dst);
    271   if (isByteSizedType(Ty)) {
    272     emitUint8(0x8A);
    273   } else {
    274     emitUint8(0x8B);
    275   }
    276   emitOperand(gprEncoding(dst), src);
    277 }
    278 
    279 template <typename TraitsType>
    280 void AssemblerX86Base<TraitsType>::mov(Type Ty, const Address &dst,
    281                                        GPRRegister src) {
    282   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    283   if (Ty == IceType_i16)
    284     emitOperandSizeOverride();
    285   emitAddrSizeOverridePrefix();
    286   emitRex(Ty, dst, src);
    287   if (isByteSizedType(Ty)) {
    288     emitUint8(0x88);
    289   } else {
    290     emitUint8(0x89);
    291   }
    292   emitOperand(gprEncoding(src), dst);
    293 }
    294 
    295 template <typename TraitsType>
    296 void AssemblerX86Base<TraitsType>::mov(Type Ty, const Address &dst,
    297                                        const Immediate &imm) {
    298   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    299   if (Ty == IceType_i16)
    300     emitOperandSizeOverride();
    301   emitAddrSizeOverridePrefix();
    302   emitRex(Ty, dst, RexRegIrrelevant);
    303   if (isByteSizedType(Ty)) {
    304     emitUint8(0xC6);
    305     static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
    306     emitOperand(0, dst, OffsetFromNextInstruction);
    307     emitUint8(imm.value() & 0xFF);
    308   } else {
    309     emitUint8(0xC7);
    310     const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
    311     emitOperand(0, dst, OffsetFromNextInstruction);
    312     emitImmediate(Ty, imm);
    313   }
    314 }
    315 
    316 template <typename TraitsType>
    317 template <typename T>
    318 typename std::enable_if<T::Is64Bit, void>::type
    319 AssemblerX86Base<TraitsType>::movabs(const GPRRegister Dst, uint64_t Imm64) {
    320   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    321   const bool NeedsRexW = (Imm64 & ~0xFFFFFFFFull) != 0;
    322   const Type RexType = NeedsRexW ? RexTypeForceRexW : RexTypeIrrelevant;
    323   emitRexB(RexType, Dst);
    324   emitUint8(0xB8 | gprEncoding(Dst));
    325   // When emitting Imm64, we don't have to mask out the upper 32 bits for
    326   // emitInt32 will/should only emit a 32-bit constant. In reality, we are
    327   // paranoid, so we go ahead an mask the upper bits out anyway.
    328   emitInt32(Imm64 & 0xFFFFFFFF);
    329   if (NeedsRexW)
    330     emitInt32((Imm64 >> 32) & 0xFFFFFFFF);
    331 }
    332 
    333 template <typename TraitsType>
    334 void AssemblerX86Base<TraitsType>::movzx(Type SrcTy, GPRRegister dst,
    335                                          GPRRegister src) {
    336   if (Traits::Is64Bit && SrcTy == IceType_i32) {
    337     // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
    338     // operand to 64-bit.
    339     mov(IceType_i32, dst, src);
    340     return;
    341   }
    342 
    343   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    344   bool ByteSized = isByteSizedType(SrcTy);
    345   assert(ByteSized || SrcTy == IceType_i16);
    346   emitRexRB(RexTypeIrrelevant, dst, SrcTy, src);
    347   emitUint8(0x0F);
    348   emitUint8(ByteSized ? 0xB6 : 0xB7);
    349   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
    350 }
    351 
    352 template <typename TraitsType>
    353 void AssemblerX86Base<TraitsType>::movzx(Type SrcTy, GPRRegister dst,
    354                                          const Address &src) {
    355   if (Traits::Is64Bit && SrcTy == IceType_i32) {
    356     // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
    357     // operand to 64-bit.
    358     mov(IceType_i32, dst, src);
    359     return;
    360   }
    361 
    362   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    363   bool ByteSized = isByteSizedType(SrcTy);
    364   assert(ByteSized || SrcTy == IceType_i16);
    365   emitAddrSizeOverridePrefix();
    366   emitRex(SrcTy, src, RexTypeIrrelevant, dst);
    367   emitUint8(0x0F);
    368   emitUint8(ByteSized ? 0xB6 : 0xB7);
    369   emitOperand(gprEncoding(dst), src);
    370 }
    371 
    372 template <typename TraitsType>
    373 void AssemblerX86Base<TraitsType>::movsx(Type SrcTy, GPRRegister dst,
    374                                          GPRRegister src) {
    375   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    376   bool ByteSized = isByteSizedType(SrcTy);
    377   emitRexRB(RexTypeForceRexW, dst, SrcTy, src);
    378   if (ByteSized || SrcTy == IceType_i16) {
    379     emitUint8(0x0F);
    380     emitUint8(ByteSized ? 0xBE : 0xBF);
    381   } else {
    382     assert(Traits::Is64Bit && SrcTy == IceType_i32);
    383     emitUint8(0x63);
    384   }
    385   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
    386 }
    387 
    388 template <typename TraitsType>
    389 void AssemblerX86Base<TraitsType>::movsx(Type SrcTy, GPRRegister dst,
    390                                          const Address &src) {
    391   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    392   bool ByteSized = isByteSizedType(SrcTy);
    393   emitAddrSizeOverridePrefix();
    394   emitRex(SrcTy, src, RexTypeForceRexW, dst);
    395   if (ByteSized || SrcTy == IceType_i16) {
    396     emitUint8(0x0F);
    397     emitUint8(ByteSized ? 0xBE : 0xBF);
    398   } else {
    399     assert(Traits::Is64Bit && SrcTy == IceType_i32);
    400     emitUint8(0x63);
    401   }
    402   emitOperand(gprEncoding(dst), src);
    403 }
    404 
    405 template <typename TraitsType>
    406 void AssemblerX86Base<TraitsType>::lea(Type Ty, GPRRegister dst,
    407                                        const Address &src) {
    408   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    409   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
    410          (Traits::Is64Bit && Ty == IceType_i64));
    411   if (Ty == IceType_i16)
    412     emitOperandSizeOverride();
    413   emitAddrSizeOverridePrefix();
    414   emitRex(Ty, src, dst);
    415   emitUint8(0x8D);
    416   emitOperand(gprEncoding(dst), src);
    417 }
    418 
    419 template <typename TraitsType>
    420 void AssemblerX86Base<TraitsType>::cmov(Type Ty, BrCond cond, GPRRegister dst,
    421                                         GPRRegister src) {
    422   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    423   if (Ty == IceType_i16)
    424     emitOperandSizeOverride();
    425   else
    426     assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
    427   emitRexRB(Ty, dst, src);
    428   emitUint8(0x0F);
    429   emitUint8(0x40 + cond);
    430   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
    431 }
    432 
    433 template <typename TraitsType>
    434 void AssemblerX86Base<TraitsType>::cmov(Type Ty, BrCond cond, GPRRegister dst,
    435                                         const Address &src) {
    436   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    437   if (Ty == IceType_i16)
    438     emitOperandSizeOverride();
    439   else
    440     assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
    441   emitAddrSizeOverridePrefix();
    442   emitRex(Ty, src, dst);
    443   emitUint8(0x0F);
    444   emitUint8(0x40 + cond);
    445   emitOperand(gprEncoding(dst), src);
    446 }
    447 
    448 template <typename TraitsType> void AssemblerX86Base<TraitsType>::rep_movsb() {
    449   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    450   emitUint8(0xF3);
    451   emitUint8(0xA4);
    452 }
    453 
    454 template <typename TraitsType>
    455 void AssemblerX86Base<TraitsType>::movss(Type Ty, XmmRegister dst,
    456                                          const Address &src) {
    457   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    458   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    459   emitAddrSizeOverridePrefix();
    460   emitRex(RexTypeIrrelevant, src, dst);
    461   emitUint8(0x0F);
    462   emitUint8(0x10);
    463   emitOperand(gprEncoding(dst), src);
    464 }
    465 
    466 template <typename TraitsType>
    467 void AssemblerX86Base<TraitsType>::movss(Type Ty, const Address &dst,
    468                                          XmmRegister src) {
    469   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    470   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    471   emitAddrSizeOverridePrefix();
    472   emitRex(RexTypeIrrelevant, dst, src);
    473   emitUint8(0x0F);
    474   emitUint8(0x11);
    475   emitOperand(gprEncoding(src), dst);
    476 }
    477 
    478 template <typename TraitsType>
    479 void AssemblerX86Base<TraitsType>::movss(Type Ty, XmmRegister dst,
    480                                          XmmRegister src) {
    481   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    482   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    483   emitRexRB(RexTypeIrrelevant, src, dst);
    484   emitUint8(0x0F);
    485   emitUint8(0x11);
    486   emitXmmRegisterOperand(src, dst);
    487 }
    488 
    489 template <typename TraitsType>
    490 void AssemblerX86Base<TraitsType>::movd(Type SrcTy, XmmRegister dst,
    491                                         GPRRegister src) {
    492   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    493   emitUint8(0x66);
    494   emitRexRB(SrcTy, dst, src);
    495   emitUint8(0x0F);
    496   emitUint8(0x6E);
    497   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
    498 }
    499 
    500 template <typename TraitsType>
    501 void AssemblerX86Base<TraitsType>::movd(Type SrcTy, XmmRegister dst,
    502                                         const Address &src) {
    503   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    504   emitUint8(0x66);
    505   emitAddrSizeOverridePrefix();
    506   emitRex(SrcTy, src, dst);
    507   emitUint8(0x0F);
    508   emitUint8(0x6E);
    509   emitOperand(gprEncoding(dst), src);
    510 }
    511 
    512 template <typename TraitsType>
    513 void AssemblerX86Base<TraitsType>::movd(Type DestTy, GPRRegister dst,
    514                                         XmmRegister src) {
    515   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    516   emitUint8(0x66);
    517   emitRexRB(DestTy, src, dst);
    518   emitUint8(0x0F);
    519   emitUint8(0x7E);
    520   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
    521 }
    522 
    523 template <typename TraitsType>
    524 void AssemblerX86Base<TraitsType>::movd(Type DestTy, const Address &dst,
    525                                         XmmRegister src) {
    526   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    527   emitUint8(0x66);
    528   emitAddrSizeOverridePrefix();
    529   emitRex(DestTy, dst, src);
    530   emitUint8(0x0F);
    531   emitUint8(0x7E);
    532   emitOperand(gprEncoding(src), dst);
    533 }
    534 
    535 template <typename TraitsType>
    536 void AssemblerX86Base<TraitsType>::movq(XmmRegister dst, XmmRegister src) {
    537   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    538   emitUint8(0xF3);
    539   emitRexRB(RexTypeIrrelevant, dst, src);
    540   emitUint8(0x0F);
    541   emitUint8(0x7E);
    542   emitXmmRegisterOperand(dst, src);
    543 }
    544 
    545 template <typename TraitsType>
    546 void AssemblerX86Base<TraitsType>::movq(const Address &dst, XmmRegister src) {
    547   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    548   emitUint8(0x66);
    549   emitAddrSizeOverridePrefix();
    550   emitRex(RexTypeIrrelevant, dst, src);
    551   emitUint8(0x0F);
    552   emitUint8(0xD6);
    553   emitOperand(gprEncoding(src), dst);
    554 }
    555 
    556 template <typename TraitsType>
    557 void AssemblerX86Base<TraitsType>::movq(XmmRegister dst, const Address &src) {
    558   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    559   emitUint8(0xF3);
    560   emitAddrSizeOverridePrefix();
    561   emitRex(RexTypeIrrelevant, src, dst);
    562   emitUint8(0x0F);
    563   emitUint8(0x7E);
    564   emitOperand(gprEncoding(dst), src);
    565 }
    566 
    567 template <typename TraitsType>
    568 void AssemblerX86Base<TraitsType>::addss(Type Ty, XmmRegister dst,
    569                                          XmmRegister src) {
    570   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    571   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    572   emitRexRB(RexTypeIrrelevant, dst, src);
    573   emitUint8(0x0F);
    574   emitUint8(0x58);
    575   emitXmmRegisterOperand(dst, src);
    576 }
    577 
    578 template <typename TraitsType>
    579 void AssemblerX86Base<TraitsType>::addss(Type Ty, XmmRegister dst,
    580                                          const Address &src) {
    581   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    582   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    583   emitAddrSizeOverridePrefix();
    584   emitRex(RexTypeIrrelevant, src, dst);
    585   emitUint8(0x0F);
    586   emitUint8(0x58);
    587   emitOperand(gprEncoding(dst), src);
    588 }
    589 
    590 template <typename TraitsType>
    591 void AssemblerX86Base<TraitsType>::subss(Type Ty, XmmRegister dst,
    592                                          XmmRegister src) {
    593   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    594   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    595   emitRexRB(RexTypeIrrelevant, dst, src);
    596   emitUint8(0x0F);
    597   emitUint8(0x5C);
    598   emitXmmRegisterOperand(dst, src);
    599 }
    600 
    601 template <typename TraitsType>
    602 void AssemblerX86Base<TraitsType>::subss(Type Ty, XmmRegister dst,
    603                                          const Address &src) {
    604   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    605   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    606   emitAddrSizeOverridePrefix();
    607   emitRex(RexTypeIrrelevant, src, dst);
    608   emitUint8(0x0F);
    609   emitUint8(0x5C);
    610   emitOperand(gprEncoding(dst), src);
    611 }
    612 
    613 template <typename TraitsType>
    614 void AssemblerX86Base<TraitsType>::mulss(Type Ty, XmmRegister dst,
    615                                          XmmRegister src) {
    616   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    617   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    618   emitRexRB(RexTypeIrrelevant, dst, src);
    619   emitUint8(0x0F);
    620   emitUint8(0x59);
    621   emitXmmRegisterOperand(dst, src);
    622 }
    623 
    624 template <typename TraitsType>
    625 void AssemblerX86Base<TraitsType>::mulss(Type Ty, XmmRegister dst,
    626                                          const Address &src) {
    627   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    628   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    629   emitAddrSizeOverridePrefix();
    630   emitRex(RexTypeIrrelevant, src, dst);
    631   emitUint8(0x0F);
    632   emitUint8(0x59);
    633   emitOperand(gprEncoding(dst), src);
    634 }
    635 
    636 template <typename TraitsType>
    637 void AssemblerX86Base<TraitsType>::divss(Type Ty, XmmRegister dst,
    638                                          XmmRegister src) {
    639   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    640   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    641   emitRexRB(RexTypeIrrelevant, dst, src);
    642   emitUint8(0x0F);
    643   emitUint8(0x5E);
    644   emitXmmRegisterOperand(dst, src);
    645 }
    646 
    647 template <typename TraitsType>
    648 void AssemblerX86Base<TraitsType>::divss(Type Ty, XmmRegister dst,
    649                                          const Address &src) {
    650   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    651   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
    652   emitAddrSizeOverridePrefix();
    653   emitRex(RexTypeIrrelevant, src, dst);
    654   emitUint8(0x0F);
    655   emitUint8(0x5E);
    656   emitOperand(gprEncoding(dst), src);
    657 }
    658 
    659 template <typename TraitsType>
    660 template <typename T, typename>
    661 void AssemblerX86Base<TraitsType>::fld(Type Ty,
    662                                        const typename T::Address &src) {
    663   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    664   emitAddrSizeOverridePrefix();
    665   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
    666   emitOperand(0, src);
    667 }
    668 
    669 template <typename TraitsType>
    670 template <typename T, typename>
    671 void AssemblerX86Base<TraitsType>::fstp(Type Ty,
    672                                         const typename T::Address &dst) {
    673   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    674   emitAddrSizeOverridePrefix();
    675   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
    676   emitOperand(3, dst);
    677 }
    678 
    679 template <typename TraitsType>
    680 template <typename T, typename>
    681 void AssemblerX86Base<TraitsType>::fstp(typename T::X87STRegister st) {
    682   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    683   emitUint8(0xDD);
    684   emitUint8(0xD8 + st);
    685 }
    686 
    687 template <typename TraitsType>
    688 void AssemblerX86Base<TraitsType>::movaps(XmmRegister dst, XmmRegister src) {
    689   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    690   emitRexRB(RexTypeIrrelevant, dst, src);
    691   emitUint8(0x0F);
    692   emitUint8(0x28);
    693   emitXmmRegisterOperand(dst, src);
    694 }
    695 
    696 template <typename TraitsType>
    697 void AssemblerX86Base<TraitsType>::movups(XmmRegister dst, XmmRegister src) {
    698   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    699   emitRexRB(RexTypeIrrelevant, dst, src);
    700   emitUint8(0x0F);
    701   emitUint8(0x10);
    702   emitXmmRegisterOperand(dst, src);
    703 }
    704 
    705 template <typename TraitsType>
    706 void AssemblerX86Base<TraitsType>::movups(XmmRegister dst, const Address &src) {
    707   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    708   emitAddrSizeOverridePrefix();
    709   emitRex(RexTypeIrrelevant, src, dst);
    710   emitUint8(0x0F);
    711   emitUint8(0x10);
    712   emitOperand(gprEncoding(dst), src);
    713 }
    714 
    715 template <typename TraitsType>
    716 void AssemblerX86Base<TraitsType>::movups(const Address &dst, XmmRegister src) {
    717   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    718   emitAddrSizeOverridePrefix();
    719   emitRex(RexTypeIrrelevant, dst, src);
    720   emitUint8(0x0F);
    721   emitUint8(0x11);
    722   emitOperand(gprEncoding(src), dst);
    723 }
    724 
    725 template <typename TraitsType>
    726 void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
    727                                         XmmRegister src) {
    728   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    729   emitUint8(0x66);
    730   emitRexRB(RexTypeIrrelevant, dst, src);
    731   emitUint8(0x0F);
    732   if (isByteSizedArithType(Ty)) {
    733     emitUint8(0xFC);
    734   } else if (Ty == IceType_i16) {
    735     emitUint8(0xFD);
    736   } else {
    737     emitUint8(0xFE);
    738   }
    739   emitXmmRegisterOperand(dst, src);
    740 }
    741 
    742 template <typename TraitsType>
    743 void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
    744                                         const Address &src) {
    745   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    746   emitUint8(0x66);
    747   emitAddrSizeOverridePrefix();
    748   emitRex(RexTypeIrrelevant, src, dst);
    749   emitUint8(0x0F);
    750   if (isByteSizedArithType(Ty)) {
    751     emitUint8(0xFC);
    752   } else if (Ty == IceType_i16) {
    753     emitUint8(0xFD);
    754   } else {
    755     emitUint8(0xFE);
    756   }
    757   emitOperand(gprEncoding(dst), src);
    758 }
    759 
    760 template <typename TraitsType>
    761 void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
    762                                          XmmRegister src) {
    763   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    764   emitUint8(0x66);
    765   emitRexRB(RexTypeIrrelevant, dst, src);
    766   emitUint8(0x0F);
    767   if (isByteSizedArithType(Ty)) {
    768     emitUint8(0xEC);
    769   } else if (Ty == IceType_i16) {
    770     emitUint8(0xED);
    771   } else {
    772     assert(false && "Unexpected padds operand type");
    773   }
    774   emitXmmRegisterOperand(dst, src);
    775 }
    776 
    777 template <typename TraitsType>
    778 void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
    779                                          const Address &src) {
    780   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    781   emitUint8(0x66);
    782   emitAddrSizeOverridePrefix();
    783   emitRex(RexTypeIrrelevant, src, dst);
    784   emitUint8(0x0F);
    785   if (isByteSizedArithType(Ty)) {
    786     emitUint8(0xEC);
    787   } else if (Ty == IceType_i16) {
    788     emitUint8(0xED);
    789   } else {
    790     assert(false && "Unexpected padds operand type");
    791   }
    792   emitOperand(gprEncoding(dst), src);
    793 }
    794 
    795 template <typename TraitsType>
    796 void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
    797                                           XmmRegister src) {
    798   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    799   emitUint8(0x66);
    800   emitRexRB(RexTypeIrrelevant, dst, src);
    801   emitUint8(0x0F);
    802   if (isByteSizedArithType(Ty)) {
    803     emitUint8(0xDC);
    804   } else if (Ty == IceType_i16) {
    805     emitUint8(0xDD);
    806   } else {
    807     assert(false && "Unexpected paddus operand type");
    808   }
    809   emitXmmRegisterOperand(dst, src);
    810 }
    811 
    812 template <typename TraitsType>
    813 void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
    814                                           const Address &src) {
    815   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    816   emitUint8(0x66);
    817   emitAddrSizeOverridePrefix();
    818   emitRex(RexTypeIrrelevant, src, dst);
    819   emitUint8(0x0F);
    820   if (isByteSizedArithType(Ty)) {
    821     emitUint8(0xDC);
    822   } else if (Ty == IceType_i16) {
    823     emitUint8(0xDD);
    824   } else {
    825     assert(false && "Unexpected paddus operand type");
    826   }
    827   emitOperand(gprEncoding(dst), src);
    828 }
    829 
    830 template <typename TraitsType>
    831 void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
    832                                         XmmRegister src) {
    833   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    834   emitUint8(0x66);
    835   emitRexRB(RexTypeIrrelevant, dst, src);
    836   emitUint8(0x0F);
    837   emitUint8(0xDB);
    838   emitXmmRegisterOperand(dst, src);
    839 }
    840 
    841 template <typename TraitsType>
    842 void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
    843                                         const Address &src) {
    844   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    845   emitUint8(0x66);
    846   emitAddrSizeOverridePrefix();
    847   emitRex(RexTypeIrrelevant, src, dst);
    848   emitUint8(0x0F);
    849   emitUint8(0xDB);
    850   emitOperand(gprEncoding(dst), src);
    851 }
    852 
    853 template <typename TraitsType>
    854 void AssemblerX86Base<TraitsType>::pandn(Type /* Ty */, XmmRegister dst,
    855                                          XmmRegister src) {
    856   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    857   emitUint8(0x66);
    858   emitRexRB(RexTypeIrrelevant, dst, src);
    859   emitUint8(0x0F);
    860   emitUint8(0xDF);
    861   emitXmmRegisterOperand(dst, src);
    862 }
    863 
    864 template <typename TraitsType>
    865 void AssemblerX86Base<TraitsType>::pandn(Type /* Ty */, XmmRegister dst,
    866                                          const Address &src) {
    867   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    868   emitUint8(0x66);
    869   emitAddrSizeOverridePrefix();
    870   emitRex(RexTypeIrrelevant, src, dst);
    871   emitUint8(0x0F);
    872   emitUint8(0xDF);
    873   emitOperand(gprEncoding(dst), src);
    874 }
    875 
    876 template <typename TraitsType>
    877 void AssemblerX86Base<TraitsType>::pmull(Type Ty, XmmRegister dst,
    878                                          XmmRegister src) {
    879   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    880   emitUint8(0x66);
    881   emitRexRB(RexTypeIrrelevant, dst, src);
    882   emitUint8(0x0F);
    883   if (Ty == IceType_i16) {
    884     emitUint8(0xD5);
    885   } else {
    886     assert(Ty == IceType_i32);
    887     emitUint8(0x38);
    888     emitUint8(0x40);
    889   }
    890   emitXmmRegisterOperand(dst, src);
    891 }
    892 
    893 template <typename TraitsType>
    894 void AssemblerX86Base<TraitsType>::pmull(Type Ty, XmmRegister dst,
    895                                          const Address &src) {
    896   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    897   emitUint8(0x66);
    898   emitAddrSizeOverridePrefix();
    899   emitRex(RexTypeIrrelevant, src, dst);
    900   emitUint8(0x0F);
    901   if (Ty == IceType_i16) {
    902     emitUint8(0xD5);
    903   } else {
    904     assert(Ty == IceType_i32);
    905     emitUint8(0x38);
    906     emitUint8(0x40);
    907   }
    908   emitOperand(gprEncoding(dst), src);
    909 }
    910 
    911 template <typename TraitsType>
    912 void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
    913                                           XmmRegister src) {
    914   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    915   emitUint8(0x66);
    916   emitRexRB(RexTypeIrrelevant, dst, src);
    917   emitUint8(0x0F);
    918   assert(Ty == IceType_v8i16);
    919   (void)Ty;
    920   emitUint8(0xE5);
    921   emitXmmRegisterOperand(dst, src);
    922 }
    923 
    924 template <typename TraitsType>
    925 void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
    926                                           const Address &src) {
    927   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    928   emitUint8(0x66);
    929   emitAddrSizeOverridePrefix();
    930   emitRex(RexTypeIrrelevant, src, dst);
    931   emitUint8(0x0F);
    932   assert(Ty == IceType_v8i16);
    933   (void)Ty;
    934   emitUint8(0xE5);
    935   emitOperand(gprEncoding(dst), src);
    936 }
    937 
    938 template <typename TraitsType>
    939 void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
    940                                            XmmRegister src) {
    941   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    942   emitUint8(0x66);
    943   emitRexRB(RexTypeIrrelevant, dst, src);
    944   emitUint8(0x0F);
    945   assert(Ty == IceType_v8i16);
    946   (void)Ty;
    947   emitUint8(0xE4);
    948   emitXmmRegisterOperand(dst, src);
    949 }
    950 
    951 template <typename TraitsType>
    952 void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
    953                                            const Address &src) {
    954   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    955   emitUint8(0x66);
    956   emitAddrSizeOverridePrefix();
    957   emitRex(RexTypeIrrelevant, src, dst);
    958   emitUint8(0x0F);
    959   assert(Ty == IceType_v8i16);
    960   (void)Ty;
    961   emitUint8(0xE4);
    962   emitOperand(gprEncoding(dst), src);
    963 }
    964 
    965 template <typename TraitsType>
    966 void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
    967                                            XmmRegister src) {
    968   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    969   emitUint8(0x66);
    970   emitRexRB(RexTypeIrrelevant, dst, src);
    971   emitUint8(0x0F);
    972   assert(Ty == IceType_v8i16);
    973   (void)Ty;
    974   emitUint8(0xF5);
    975   emitXmmRegisterOperand(dst, src);
    976 }
    977 
    978 template <typename TraitsType>
    979 void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
    980                                            const Address &src) {
    981   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    982   emitUint8(0x66);
    983   emitAddrSizeOverridePrefix();
    984   emitRex(RexTypeIrrelevant, src, dst);
    985   emitUint8(0x0F);
    986   assert(Ty == IceType_v8i16);
    987   (void)Ty;
    988   emitUint8(0xF5);
    989   emitOperand(gprEncoding(dst), src);
    990 }
    991 
    992 template <typename TraitsType>
    993 void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
    994                                            XmmRegister src) {
    995   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    996   emitUint8(0x66);
    997   emitRexRB(RexTypeIrrelevant, dst, src);
    998   emitUint8(0x0F);
    999   emitUint8(0xF4);
   1000   emitXmmRegisterOperand(dst, src);
   1001 }
   1002 
   1003 template <typename TraitsType>
   1004 void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
   1005                                            const Address &src) {
   1006   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1007   emitUint8(0x66);
   1008   emitAddrSizeOverridePrefix();
   1009   emitRex(RexTypeIrrelevant, src, dst);
   1010   emitUint8(0x0F);
   1011   emitUint8(0xF4);
   1012   emitOperand(gprEncoding(dst), src);
   1013 }
   1014 
   1015 template <typename TraitsType>
   1016 void AssemblerX86Base<TraitsType>::por(Type /* Ty */, XmmRegister dst,
   1017                                        XmmRegister src) {
   1018   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1019   emitUint8(0x66);
   1020   emitRexRB(RexTypeIrrelevant, dst, src);
   1021   emitUint8(0x0F);
   1022   emitUint8(0xEB);
   1023   emitXmmRegisterOperand(dst, src);
   1024 }
   1025 
   1026 template <typename TraitsType>
   1027 void AssemblerX86Base<TraitsType>::por(Type /* Ty */, XmmRegister dst,
   1028                                        const Address &src) {
   1029   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1030   emitUint8(0x66);
   1031   emitAddrSizeOverridePrefix();
   1032   emitRex(RexTypeIrrelevant, src, dst);
   1033   emitUint8(0x0F);
   1034   emitUint8(0xEB);
   1035   emitOperand(gprEncoding(dst), src);
   1036 }
   1037 
   1038 template <typename TraitsType>
   1039 void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
   1040                                         XmmRegister src) {
   1041   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1042   emitUint8(0x66);
   1043   emitRexRB(RexTypeIrrelevant, dst, src);
   1044   emitUint8(0x0F);
   1045   if (isByteSizedArithType(Ty)) {
   1046     emitUint8(0xF8);
   1047   } else if (Ty == IceType_i16) {
   1048     emitUint8(0xF9);
   1049   } else {
   1050     emitUint8(0xFA);
   1051   }
   1052   emitXmmRegisterOperand(dst, src);
   1053 }
   1054 
   1055 template <typename TraitsType>
   1056 void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
   1057                                         const Address &src) {
   1058   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1059   emitUint8(0x66);
   1060   emitAddrSizeOverridePrefix();
   1061   emitRex(RexTypeIrrelevant, src, dst);
   1062   emitUint8(0x0F);
   1063   if (isByteSizedArithType(Ty)) {
   1064     emitUint8(0xF8);
   1065   } else if (Ty == IceType_i16) {
   1066     emitUint8(0xF9);
   1067   } else {
   1068     emitUint8(0xFA);
   1069   }
   1070   emitOperand(gprEncoding(dst), src);
   1071 }
   1072 
   1073 template <typename TraitsType>
   1074 void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
   1075                                          XmmRegister src) {
   1076   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1077   emitUint8(0x66);
   1078   emitRexRB(RexTypeIrrelevant, dst, src);
   1079   emitUint8(0x0F);
   1080   if (isByteSizedArithType(Ty)) {
   1081     emitUint8(0xE8);
   1082   } else if (Ty == IceType_i16) {
   1083     emitUint8(0xE9);
   1084   } else {
   1085     assert(false && "Unexpected psubs operand type");
   1086   }
   1087   emitXmmRegisterOperand(dst, src);
   1088 }
   1089 
   1090 template <typename TraitsType>
   1091 void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
   1092                                          const Address &src) {
   1093   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1094   emitUint8(0x66);
   1095   emitAddrSizeOverridePrefix();
   1096   emitRex(RexTypeIrrelevant, src, dst);
   1097   emitUint8(0x0F);
   1098   if (isByteSizedArithType(Ty)) {
   1099     emitUint8(0xE8);
   1100   } else if (Ty == IceType_i16) {
   1101     emitUint8(0xE9);
   1102   } else {
   1103     assert(false && "Unexpected psubs operand type");
   1104   }
   1105   emitOperand(gprEncoding(dst), src);
   1106 }
   1107 template <typename TraitsType>
   1108 void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
   1109                                           XmmRegister src) {
   1110   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1111   emitUint8(0x66);
   1112   emitRexRB(RexTypeIrrelevant, dst, src);
   1113   emitUint8(0x0F);
   1114   if (isByteSizedArithType(Ty)) {
   1115     emitUint8(0xD8);
   1116   } else if (Ty == IceType_i16) {
   1117     emitUint8(0xD9);
   1118   } else {
   1119     assert(false && "Unexpected psubus operand type");
   1120   }
   1121   emitXmmRegisterOperand(dst, src);
   1122 }
   1123 
   1124 template <typename TraitsType>
   1125 void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
   1126                                           const Address &src) {
   1127   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1128   emitUint8(0x66);
   1129   emitAddrSizeOverridePrefix();
   1130   emitRex(RexTypeIrrelevant, src, dst);
   1131   emitUint8(0x0F);
   1132   if (isByteSizedArithType(Ty)) {
   1133     emitUint8(0xD8);
   1134   } else if (Ty == IceType_i16) {
   1135     emitUint8(0xD9);
   1136   } else {
   1137     assert(false && "Unexpected psubus operand type");
   1138   }
   1139   emitOperand(gprEncoding(dst), src);
   1140 }
   1141 
   1142 template <typename TraitsType>
   1143 void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
   1144                                         XmmRegister src) {
   1145   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1146   emitUint8(0x66);
   1147   emitRexRB(RexTypeIrrelevant, dst, src);
   1148   emitUint8(0x0F);
   1149   emitUint8(0xEF);
   1150   emitXmmRegisterOperand(dst, src);
   1151 }
   1152 
   1153 template <typename TraitsType>
   1154 void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
   1155                                         const Address &src) {
   1156   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1157   emitUint8(0x66);
   1158   emitAddrSizeOverridePrefix();
   1159   emitRex(RexTypeIrrelevant, src, dst);
   1160   emitUint8(0x0F);
   1161   emitUint8(0xEF);
   1162   emitOperand(gprEncoding(dst), src);
   1163 }
   1164 
   1165 template <typename TraitsType>
   1166 void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
   1167                                         XmmRegister src) {
   1168   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1169   emitUint8(0x66);
   1170   emitRexRB(RexTypeIrrelevant, dst, src);
   1171   emitUint8(0x0F);
   1172   if (Ty == IceType_i16) {
   1173     emitUint8(0xF1);
   1174   } else {
   1175     assert(Ty == IceType_i32);
   1176     emitUint8(0xF2);
   1177   }
   1178   emitXmmRegisterOperand(dst, src);
   1179 }
   1180 
   1181 template <typename TraitsType>
   1182 void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
   1183                                         const Address &src) {
   1184   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1185   emitUint8(0x66);
   1186   emitAddrSizeOverridePrefix();
   1187   emitRex(RexTypeIrrelevant, src, dst);
   1188   emitUint8(0x0F);
   1189   if (Ty == IceType_i16) {
   1190     emitUint8(0xF1);
   1191   } else {
   1192     assert(Ty == IceType_i32);
   1193     emitUint8(0xF2);
   1194   }
   1195   emitOperand(gprEncoding(dst), src);
   1196 }
   1197 
   1198 template <typename TraitsType>
   1199 void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
   1200                                         const Immediate &imm) {
   1201   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1202   assert(imm.is_int8());
   1203   emitUint8(0x66);
   1204   emitRexB(RexTypeIrrelevant, dst);
   1205   emitUint8(0x0F);
   1206   if (Ty == IceType_i16) {
   1207     emitUint8(0x71);
   1208   } else {
   1209     assert(Ty == IceType_i32);
   1210     emitUint8(0x72);
   1211   }
   1212   emitRegisterOperand(6, gprEncoding(dst));
   1213   emitUint8(imm.value() & 0xFF);
   1214 }
   1215 
   1216 template <typename TraitsType>
   1217 void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
   1218                                         XmmRegister src) {
   1219   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1220   emitUint8(0x66);
   1221   emitRexRB(RexTypeIrrelevant, dst, src);
   1222   emitUint8(0x0F);
   1223   if (Ty == IceType_i16) {
   1224     emitUint8(0xE1);
   1225   } else {
   1226     assert(Ty == IceType_i32);
   1227     emitUint8(0xE2);
   1228   }
   1229   emitXmmRegisterOperand(dst, src);
   1230 }
   1231 
   1232 template <typename TraitsType>
   1233 void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
   1234                                         const Address &src) {
   1235   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1236   emitUint8(0x66);
   1237   emitAddrSizeOverridePrefix();
   1238   emitRex(RexTypeIrrelevant, src, dst);
   1239   emitUint8(0x0F);
   1240   if (Ty == IceType_i16) {
   1241     emitUint8(0xE1);
   1242   } else {
   1243     assert(Ty == IceType_i32);
   1244     emitUint8(0xE2);
   1245   }
   1246   emitOperand(gprEncoding(dst), src);
   1247 }
   1248 
   1249 template <typename TraitsType>
   1250 void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
   1251                                         const Immediate &imm) {
   1252   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1253   assert(imm.is_int8());
   1254   emitUint8(0x66);
   1255   emitRexB(RexTypeIrrelevant, dst);
   1256   emitUint8(0x0F);
   1257   if (Ty == IceType_i16) {
   1258     emitUint8(0x71);
   1259   } else {
   1260     assert(Ty == IceType_i32);
   1261     emitUint8(0x72);
   1262   }
   1263   emitRegisterOperand(4, gprEncoding(dst));
   1264   emitUint8(imm.value() & 0xFF);
   1265 }
   1266 
   1267 template <typename TraitsType>
   1268 void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
   1269                                         XmmRegister src) {
   1270   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1271   emitUint8(0x66);
   1272   emitRexRB(RexTypeIrrelevant, dst, src);
   1273   emitUint8(0x0F);
   1274   if (Ty == IceType_i16) {
   1275     emitUint8(0xD1);
   1276   } else if (Ty == IceType_f64) {
   1277     emitUint8(0xD3);
   1278   } else {
   1279     assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
   1280     emitUint8(0xD2);
   1281   }
   1282   emitXmmRegisterOperand(dst, src);
   1283 }
   1284 
   1285 template <typename TraitsType>
   1286 void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
   1287                                         const Address &src) {
   1288   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1289   emitUint8(0x66);
   1290   emitAddrSizeOverridePrefix();
   1291   emitRex(RexTypeIrrelevant, src, dst);
   1292   emitUint8(0x0F);
   1293   if (Ty == IceType_i16) {
   1294     emitUint8(0xD1);
   1295   } else if (Ty == IceType_f64) {
   1296     emitUint8(0xD3);
   1297   } else {
   1298     assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
   1299     emitUint8(0xD2);
   1300   }
   1301   emitOperand(gprEncoding(dst), src);
   1302 }
   1303 
   1304 template <typename TraitsType>
   1305 void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
   1306                                         const Immediate &imm) {
   1307   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1308   assert(imm.is_int8());
   1309   emitUint8(0x66);
   1310   emitRexB(RexTypeIrrelevant, dst);
   1311   emitUint8(0x0F);
   1312   if (Ty == IceType_i16) {
   1313     emitUint8(0x71);
   1314   } else if (Ty == IceType_f64) {
   1315     emitUint8(0x73);
   1316   } else {
   1317     assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
   1318     emitUint8(0x72);
   1319   }
   1320   emitRegisterOperand(2, gprEncoding(dst));
   1321   emitUint8(imm.value() & 0xFF);
   1322 }
   1323 
   1324 // {add,sub,mul,div}ps are given a Ty parameter for consistency with
   1325 // {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows addpd, etc.,
   1326 // we can use the Ty parameter to decide on adding a 0x66 prefix.
   1327 template <typename TraitsType>
   1328 void AssemblerX86Base<TraitsType>::addps(Type /* Ty */, XmmRegister dst,
   1329                                          XmmRegister src) {
   1330   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1331   emitRexRB(RexTypeIrrelevant, dst, src);
   1332   emitUint8(0x0F);
   1333   emitUint8(0x58);
   1334   emitXmmRegisterOperand(dst, src);
   1335 }
   1336 
   1337 template <typename TraitsType>
   1338 void AssemblerX86Base<TraitsType>::addps(Type /* Ty */, XmmRegister dst,
   1339                                          const Address &src) {
   1340   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1341   emitAddrSizeOverridePrefix();
   1342   emitRex(RexTypeIrrelevant, src, dst);
   1343   emitUint8(0x0F);
   1344   emitUint8(0x58);
   1345   emitOperand(gprEncoding(dst), src);
   1346 }
   1347 
   1348 template <typename TraitsType>
   1349 void AssemblerX86Base<TraitsType>::subps(Type /* Ty */, XmmRegister dst,
   1350                                          XmmRegister src) {
   1351   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1352   emitRexRB(RexTypeIrrelevant, dst, src);
   1353   emitUint8(0x0F);
   1354   emitUint8(0x5C);
   1355   emitXmmRegisterOperand(dst, src);
   1356 }
   1357 
   1358 template <typename TraitsType>
   1359 void AssemblerX86Base<TraitsType>::subps(Type /* Ty */, XmmRegister dst,
   1360                                          const Address &src) {
   1361   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1362   emitAddrSizeOverridePrefix();
   1363   emitRex(RexTypeIrrelevant, src, dst);
   1364   emitUint8(0x0F);
   1365   emitUint8(0x5C);
   1366   emitOperand(gprEncoding(dst), src);
   1367 }
   1368 
   1369 template <typename TraitsType>
   1370 void AssemblerX86Base<TraitsType>::divps(Type /* Ty */, XmmRegister dst,
   1371                                          XmmRegister src) {
   1372   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1373   emitRexRB(RexTypeIrrelevant, dst, src);
   1374   emitUint8(0x0F);
   1375   emitUint8(0x5E);
   1376   emitXmmRegisterOperand(dst, src);
   1377 }
   1378 
   1379 template <typename TraitsType>
   1380 void AssemblerX86Base<TraitsType>::divps(Type /* Ty */, XmmRegister dst,
   1381                                          const Address &src) {
   1382   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1383   emitAddrSizeOverridePrefix();
   1384   emitRex(RexTypeIrrelevant, src, dst);
   1385   emitUint8(0x0F);
   1386   emitUint8(0x5E);
   1387   emitOperand(gprEncoding(dst), src);
   1388 }
   1389 
   1390 template <typename TraitsType>
   1391 void AssemblerX86Base<TraitsType>::mulps(Type /* Ty */, XmmRegister dst,
   1392                                          XmmRegister src) {
   1393   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1394   emitRexRB(RexTypeIrrelevant, dst, src);
   1395   emitUint8(0x0F);
   1396   emitUint8(0x59);
   1397   emitXmmRegisterOperand(dst, src);
   1398 }
   1399 
   1400 template <typename TraitsType>
   1401 void AssemblerX86Base<TraitsType>::mulps(Type /* Ty */, XmmRegister dst,
   1402                                          const Address &src) {
   1403   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1404   emitAddrSizeOverridePrefix();
   1405   emitRex(RexTypeIrrelevant, src, dst);
   1406   emitUint8(0x0F);
   1407   emitUint8(0x59);
   1408   emitOperand(gprEncoding(dst), src);
   1409 }
   1410 
   1411 template <typename TraitsType>
   1412 void AssemblerX86Base<TraitsType>::minps(Type Ty, XmmRegister dst,
   1413                                          XmmRegister src) {
   1414   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1415   if (!isFloat32Asserting32Or64(Ty))
   1416     emitUint8(0x66);
   1417   emitRexRB(RexTypeIrrelevant, dst, src);
   1418   emitUint8(0x0F);
   1419   emitUint8(0x5D);
   1420   emitXmmRegisterOperand(dst, src);
   1421 }
   1422 
   1423 template <typename TraitsType>
   1424 void AssemblerX86Base<TraitsType>::minps(Type Ty, XmmRegister dst,
   1425                                          const Address &src) {
   1426   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1427   if (!isFloat32Asserting32Or64(Ty))
   1428     emitUint8(0x66);
   1429   emitAddrSizeOverridePrefix();
   1430   emitRex(RexTypeIrrelevant, src, dst);
   1431   emitUint8(0x0F);
   1432   emitUint8(0x5D);
   1433   emitOperand(gprEncoding(dst), src);
   1434 }
   1435 
   1436 template <typename TraitsType>
   1437 void AssemblerX86Base<TraitsType>::minss(Type Ty, XmmRegister dst,
   1438                                          XmmRegister src) {
   1439   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1440   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   1441   emitRexRB(RexTypeIrrelevant, dst, src);
   1442   emitUint8(0x0F);
   1443   emitUint8(0x5D);
   1444   emitXmmRegisterOperand(dst, src);
   1445 }
   1446 
   1447 template <typename TraitsType>
   1448 void AssemblerX86Base<TraitsType>::minss(Type Ty, XmmRegister dst,
   1449                                          const Address &src) {
   1450   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1451   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   1452   emitAddrSizeOverridePrefix();
   1453   emitRex(RexTypeIrrelevant, src, dst);
   1454   emitUint8(0x0F);
   1455   emitUint8(0x5D);
   1456   emitOperand(gprEncoding(dst), src);
   1457 }
   1458 
   1459 template <typename TraitsType>
   1460 void AssemblerX86Base<TraitsType>::maxps(Type Ty, XmmRegister dst,
   1461                                          XmmRegister src) {
   1462   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1463   if (!isFloat32Asserting32Or64(Ty))
   1464     emitUint8(0x66);
   1465   emitRexRB(RexTypeIrrelevant, dst, src);
   1466   emitUint8(0x0F);
   1467   emitUint8(0x5F);
   1468   emitXmmRegisterOperand(dst, src);
   1469 }
   1470 
   1471 template <typename TraitsType>
   1472 void AssemblerX86Base<TraitsType>::maxps(Type Ty, XmmRegister dst,
   1473                                          const Address &src) {
   1474   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1475   if (!isFloat32Asserting32Or64(Ty))
   1476     emitUint8(0x66);
   1477   emitAddrSizeOverridePrefix();
   1478   emitRex(RexTypeIrrelevant, src, dst);
   1479   emitUint8(0x0F);
   1480   emitUint8(0x5F);
   1481   emitOperand(gprEncoding(dst), src);
   1482 }
   1483 
   1484 template <typename TraitsType>
   1485 void AssemblerX86Base<TraitsType>::maxss(Type Ty, XmmRegister dst,
   1486                                          XmmRegister src) {
   1487   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1488   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   1489   emitRexRB(RexTypeIrrelevant, dst, src);
   1490   emitUint8(0x0F);
   1491   emitUint8(0x5F);
   1492   emitXmmRegisterOperand(dst, src);
   1493 }
   1494 
   1495 template <typename TraitsType>
   1496 void AssemblerX86Base<TraitsType>::maxss(Type Ty, XmmRegister dst,
   1497                                          const Address &src) {
   1498   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1499   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   1500   emitAddrSizeOverridePrefix();
   1501   emitRex(RexTypeIrrelevant, src, dst);
   1502   emitUint8(0x0F);
   1503   emitUint8(0x5F);
   1504   emitOperand(gprEncoding(dst), src);
   1505 }
   1506 
   1507 template <typename TraitsType>
   1508 void AssemblerX86Base<TraitsType>::andnps(Type Ty, XmmRegister dst,
   1509                                           XmmRegister src) {
   1510   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1511   if (!isFloat32Asserting32Or64(Ty))
   1512     emitUint8(0x66);
   1513   emitRexRB(RexTypeIrrelevant, dst, src);
   1514   emitUint8(0x0F);
   1515   emitUint8(0x55);
   1516   emitXmmRegisterOperand(dst, src);
   1517 }
   1518 
   1519 template <typename TraitsType>
   1520 void AssemblerX86Base<TraitsType>::andnps(Type Ty, XmmRegister dst,
   1521                                           const Address &src) {
   1522   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1523   if (!isFloat32Asserting32Or64(Ty))
   1524     emitUint8(0x66);
   1525   emitAddrSizeOverridePrefix();
   1526   emitRex(RexTypeIrrelevant, src, dst);
   1527   emitUint8(0x0F);
   1528   emitUint8(0x55);
   1529   emitOperand(gprEncoding(dst), src);
   1530 }
   1531 
   1532 template <typename TraitsType>
   1533 void AssemblerX86Base<TraitsType>::andps(Type Ty, XmmRegister dst,
   1534                                          XmmRegister src) {
   1535   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1536   if (!isFloat32Asserting32Or64(Ty))
   1537     emitUint8(0x66);
   1538   emitRexRB(RexTypeIrrelevant, dst, src);
   1539   emitUint8(0x0F);
   1540   emitUint8(0x54);
   1541   emitXmmRegisterOperand(dst, src);
   1542 }
   1543 
   1544 template <typename TraitsType>
   1545 void AssemblerX86Base<TraitsType>::andps(Type Ty, XmmRegister dst,
   1546                                          const Address &src) {
   1547   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1548   if (!isFloat32Asserting32Or64(Ty))
   1549     emitUint8(0x66);
   1550   emitAddrSizeOverridePrefix();
   1551   emitRex(RexTypeIrrelevant, src, dst);
   1552   emitUint8(0x0F);
   1553   emitUint8(0x54);
   1554   emitOperand(gprEncoding(dst), src);
   1555 }
   1556 
   1557 template <typename TraitsType>
   1558 void AssemblerX86Base<TraitsType>::orps(Type Ty, XmmRegister dst,
   1559                                         XmmRegister src) {
   1560   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1561   if (!isFloat32Asserting32Or64(Ty))
   1562     emitUint8(0x66);
   1563   emitRexRB(RexTypeIrrelevant, dst, src);
   1564   emitUint8(0x0F);
   1565   emitUint8(0x56);
   1566   emitXmmRegisterOperand(dst, src);
   1567 }
   1568 
   1569 template <typename TraitsType>
   1570 void AssemblerX86Base<TraitsType>::orps(Type Ty, XmmRegister dst,
   1571                                         const Address &src) {
   1572   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1573   if (!isFloat32Asserting32Or64(Ty))
   1574     emitUint8(0x66);
   1575   emitAddrSizeOverridePrefix();
   1576   emitRex(RexTypeIrrelevant, src, dst);
   1577   emitUint8(0x0F);
   1578   emitUint8(0x56);
   1579   emitOperand(gprEncoding(dst), src);
   1580 }
   1581 
   1582 template <typename TraitsType>
   1583 void AssemblerX86Base<TraitsType>::blendvps(Type /* Ty */, XmmRegister dst,
   1584                                             XmmRegister src) {
   1585   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1586   emitUint8(0x66);
   1587   emitRexRB(RexTypeIrrelevant, dst, src);
   1588   emitUint8(0x0F);
   1589   emitUint8(0x38);
   1590   emitUint8(0x14);
   1591   emitXmmRegisterOperand(dst, src);
   1592 }
   1593 
   1594 template <typename TraitsType>
   1595 void AssemblerX86Base<TraitsType>::blendvps(Type /* Ty */, XmmRegister dst,
   1596                                             const Address &src) {
   1597   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1598   emitUint8(0x66);
   1599   emitAddrSizeOverridePrefix();
   1600   emitRex(RexTypeIrrelevant, src, dst);
   1601   emitUint8(0x0F);
   1602   emitUint8(0x38);
   1603   emitUint8(0x14);
   1604   emitOperand(gprEncoding(dst), src);
   1605 }
   1606 
   1607 template <typename TraitsType>
   1608 void AssemblerX86Base<TraitsType>::pblendvb(Type /* Ty */, XmmRegister dst,
   1609                                             XmmRegister src) {
   1610   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1611   emitUint8(0x66);
   1612   emitRexRB(RexTypeIrrelevant, dst, src);
   1613   emitUint8(0x0F);
   1614   emitUint8(0x38);
   1615   emitUint8(0x10);
   1616   emitXmmRegisterOperand(dst, src);
   1617 }
   1618 
   1619 template <typename TraitsType>
   1620 void AssemblerX86Base<TraitsType>::pblendvb(Type /* Ty */, XmmRegister dst,
   1621                                             const Address &src) {
   1622   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1623   emitUint8(0x66);
   1624   emitAddrSizeOverridePrefix();
   1625   emitRex(RexTypeIrrelevant, src, dst);
   1626   emitUint8(0x0F);
   1627   emitUint8(0x38);
   1628   emitUint8(0x10);
   1629   emitOperand(gprEncoding(dst), src);
   1630 }
   1631 
   1632 template <typename TraitsType>
   1633 void AssemblerX86Base<TraitsType>::cmpps(Type Ty, XmmRegister dst,
   1634                                          XmmRegister src,
   1635                                          CmppsCond CmpCondition) {
   1636   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1637   if (Ty == IceType_f64)
   1638     emitUint8(0x66);
   1639   emitRexRB(RexTypeIrrelevant, dst, src);
   1640   emitUint8(0x0F);
   1641   emitUint8(0xC2);
   1642   emitXmmRegisterOperand(dst, src);
   1643   emitUint8(CmpCondition);
   1644 }
   1645 
   1646 template <typename TraitsType>
   1647 void AssemblerX86Base<TraitsType>::cmpps(Type Ty, XmmRegister dst,
   1648                                          const Address &src,
   1649                                          CmppsCond CmpCondition) {
   1650   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1651   if (Ty == IceType_f64)
   1652     emitUint8(0x66);
   1653   emitAddrSizeOverridePrefix();
   1654   emitRex(RexTypeIrrelevant, src, dst);
   1655   emitUint8(0x0F);
   1656   emitUint8(0xC2);
   1657   static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   1658   emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
   1659   emitUint8(CmpCondition);
   1660 }
   1661 
   1662 template <typename TraitsType>
   1663 void AssemblerX86Base<TraitsType>::sqrtps(XmmRegister dst) {
   1664   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1665   emitRexRB(RexTypeIrrelevant, dst, dst);
   1666   emitUint8(0x0F);
   1667   emitUint8(0x51);
   1668   emitXmmRegisterOperand(dst, dst);
   1669 }
   1670 
   1671 template <typename TraitsType>
   1672 void AssemblerX86Base<TraitsType>::rsqrtps(XmmRegister dst) {
   1673   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1674   emitRexRB(RexTypeIrrelevant, dst, dst);
   1675   emitUint8(0x0F);
   1676   emitUint8(0x52);
   1677   emitXmmRegisterOperand(dst, dst);
   1678 }
   1679 
   1680 template <typename TraitsType>
   1681 void AssemblerX86Base<TraitsType>::reciprocalps(XmmRegister dst) {
   1682   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1683   emitRexRB(RexTypeIrrelevant, dst, dst);
   1684   emitUint8(0x0F);
   1685   emitUint8(0x53);
   1686   emitXmmRegisterOperand(dst, dst);
   1687 }
   1688 
   1689 template <typename TraitsType>
   1690 void AssemblerX86Base<TraitsType>::movhlps(XmmRegister dst, XmmRegister src) {
   1691   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1692   emitRexRB(RexTypeIrrelevant, dst, src);
   1693   emitUint8(0x0F);
   1694   emitUint8(0x12);
   1695   emitXmmRegisterOperand(dst, src);
   1696 }
   1697 
   1698 template <typename TraitsType>
   1699 void AssemblerX86Base<TraitsType>::movlhps(XmmRegister dst, XmmRegister src) {
   1700   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1701   emitRexRB(RexTypeIrrelevant, dst, src);
   1702   emitUint8(0x0F);
   1703   emitUint8(0x16);
   1704   emitXmmRegisterOperand(dst, src);
   1705 }
   1706 
   1707 template <typename TraitsType>
   1708 void AssemblerX86Base<TraitsType>::unpcklps(XmmRegister dst, XmmRegister src) {
   1709   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1710   emitRexRB(RexTypeIrrelevant, dst, src);
   1711   emitUint8(0x0F);
   1712   emitUint8(0x14);
   1713   emitXmmRegisterOperand(dst, src);
   1714 }
   1715 
   1716 template <typename TraitsType>
   1717 void AssemblerX86Base<TraitsType>::unpckhps(XmmRegister dst, XmmRegister src) {
   1718   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1719   emitRexRB(RexTypeIrrelevant, dst, src);
   1720   emitUint8(0x0F);
   1721   emitUint8(0x15);
   1722   emitXmmRegisterOperand(dst, src);
   1723 }
   1724 
   1725 template <typename TraitsType>
   1726 void AssemblerX86Base<TraitsType>::unpcklpd(XmmRegister dst, XmmRegister src) {
   1727   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1728   emitUint8(0x66);
   1729   emitRexRB(RexTypeIrrelevant, dst, src);
   1730   emitUint8(0x0F);
   1731   emitUint8(0x14);
   1732   emitXmmRegisterOperand(dst, src);
   1733 }
   1734 
   1735 template <typename TraitsType>
   1736 void AssemblerX86Base<TraitsType>::unpckhpd(XmmRegister dst, XmmRegister src) {
   1737   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1738   emitUint8(0x66);
   1739   emitRexRB(RexTypeIrrelevant, dst, src);
   1740   emitUint8(0x0F);
   1741   emitUint8(0x15);
   1742   emitXmmRegisterOperand(dst, src);
   1743 }
   1744 
   1745 template <typename TraitsType>
   1746 void AssemblerX86Base<TraitsType>::set1ps(XmmRegister dst, GPRRegister tmp1,
   1747                                           const Immediate &imm) {
   1748   // Load 32-bit immediate value into tmp1.
   1749   mov(IceType_i32, tmp1, imm);
   1750   // Move value from tmp1 into dst.
   1751   movd(IceType_i32, dst, tmp1);
   1752   // Broadcast low lane into other three lanes.
   1753   shufps(RexTypeIrrelevant, dst, dst, Immediate(0x0));
   1754 }
   1755 
   1756 template <typename TraitsType>
   1757 void AssemblerX86Base<TraitsType>::pshufb(Type /* Ty */, XmmRegister dst,
   1758                                           XmmRegister src) {
   1759   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1760   emitUint8(0x66);
   1761   emitRexRB(RexTypeIrrelevant, dst, src);
   1762   emitUint8(0x0F);
   1763   emitUint8(0x38);
   1764   emitUint8(0x00);
   1765   emitXmmRegisterOperand(dst, src);
   1766 }
   1767 
   1768 template <typename TraitsType>
   1769 void AssemblerX86Base<TraitsType>::pshufb(Type /* Ty */, XmmRegister dst,
   1770                                           const Address &src) {
   1771   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1772   emitUint8(0x66);
   1773   emitAddrSizeOverridePrefix();
   1774   emitRex(RexTypeIrrelevant, src, dst);
   1775   emitUint8(0x0F);
   1776   emitUint8(0x38);
   1777   emitUint8(0x00);
   1778   emitOperand(gprEncoding(dst), src);
   1779 }
   1780 
   1781 template <typename TraitsType>
   1782 void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
   1783                                           XmmRegister src,
   1784                                           const Immediate &imm) {
   1785   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1786   emitUint8(0x66);
   1787   emitRexRB(RexTypeIrrelevant, dst, src);
   1788   emitUint8(0x0F);
   1789   emitUint8(0x70);
   1790   emitXmmRegisterOperand(dst, src);
   1791   assert(imm.is_uint8());
   1792   emitUint8(imm.value());
   1793 }
   1794 
   1795 template <typename TraitsType>
   1796 void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
   1797                                           const Address &src,
   1798                                           const Immediate &imm) {
   1799   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1800   emitUint8(0x66);
   1801   emitAddrSizeOverridePrefix();
   1802   emitRex(RexTypeIrrelevant, src, dst);
   1803   emitUint8(0x0F);
   1804   emitUint8(0x70);
   1805   static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   1806   emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
   1807   assert(imm.is_uint8());
   1808   emitUint8(imm.value());
   1809 }
   1810 
   1811 template <typename TraitsType>
   1812 void AssemblerX86Base<TraitsType>::punpckl(Type Ty, XmmRegister Dst,
   1813                                            XmmRegister Src) {
   1814   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1815   emitUint8(0x66);
   1816   emitRexRB(RexTypeIrrelevant, Dst, Src);
   1817   emitUint8(0x0F);
   1818   if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
   1819     emitUint8(0x62);
   1820   } else if (Ty == IceType_v8i16) {
   1821     emitUint8(0x61);
   1822   } else if (Ty == IceType_v16i8) {
   1823     emitUint8(0x60);
   1824   } else {
   1825     assert(false && "Unexpected vector unpack operand type");
   1826   }
   1827   emitXmmRegisterOperand(Dst, Src);
   1828 }
   1829 
   1830 template <typename TraitsType>
   1831 void AssemblerX86Base<TraitsType>::punpckl(Type Ty, XmmRegister Dst,
   1832                                            const Address &Src) {
   1833   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1834   emitUint8(0x66);
   1835   emitAddrSizeOverridePrefix();
   1836   emitRex(RexTypeIrrelevant, Src, Dst);
   1837   emitUint8(0x0F);
   1838   if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
   1839     emitUint8(0x62);
   1840   } else if (Ty == IceType_v8i16) {
   1841     emitUint8(0x61);
   1842   } else if (Ty == IceType_v16i8) {
   1843     emitUint8(0x60);
   1844   } else {
   1845     assert(false && "Unexpected vector unpack operand type");
   1846   }
   1847   emitOperand(gprEncoding(Dst), Src);
   1848 }
   1849 
   1850 template <typename TraitsType>
   1851 void AssemblerX86Base<TraitsType>::punpckh(Type Ty, XmmRegister Dst,
   1852                                            XmmRegister Src) {
   1853   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1854   emitUint8(0x66);
   1855   emitRexRB(RexTypeIrrelevant, Dst, Src);
   1856   emitUint8(0x0F);
   1857   if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
   1858     emitUint8(0x6A);
   1859   } else if (Ty == IceType_v8i16) {
   1860     emitUint8(0x69);
   1861   } else if (Ty == IceType_v16i8) {
   1862     emitUint8(0x68);
   1863   } else {
   1864     assert(false && "Unexpected vector unpack operand type");
   1865   }
   1866   emitXmmRegisterOperand(Dst, Src);
   1867 }
   1868 
   1869 template <typename TraitsType>
   1870 void AssemblerX86Base<TraitsType>::punpckh(Type Ty, XmmRegister Dst,
   1871                                            const Address &Src) {
   1872   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1873   emitUint8(0x66);
   1874   emitAddrSizeOverridePrefix();
   1875   emitRex(RexTypeIrrelevant, Src, Dst);
   1876   emitUint8(0x0F);
   1877   if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
   1878     emitUint8(0x6A);
   1879   } else if (Ty == IceType_v8i16) {
   1880     emitUint8(0x69);
   1881   } else if (Ty == IceType_v16i8) {
   1882     emitUint8(0x68);
   1883   } else {
   1884     assert(false && "Unexpected vector unpack operand type");
   1885   }
   1886   emitOperand(gprEncoding(Dst), Src);
   1887 }
   1888 
   1889 template <typename TraitsType>
   1890 void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
   1891                                           XmmRegister Src) {
   1892   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1893   emitUint8(0x66);
   1894   emitRexRB(RexTypeIrrelevant, Dst, Src);
   1895   emitUint8(0x0F);
   1896   if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
   1897     emitUint8(0x6B);
   1898   } else if (Ty == IceType_v8i16) {
   1899     emitUint8(0x63);
   1900   } else {
   1901     assert(false && "Unexpected vector pack operand type");
   1902   }
   1903   emitXmmRegisterOperand(Dst, Src);
   1904 }
   1905 
   1906 template <typename TraitsType>
   1907 void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
   1908                                           const Address &Src) {
   1909   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1910   emitUint8(0x66);
   1911   emitAddrSizeOverridePrefix();
   1912   emitRex(RexTypeIrrelevant, Src, Dst);
   1913   emitUint8(0x0F);
   1914   if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
   1915     emitUint8(0x6B);
   1916   } else if (Ty == IceType_v8i16) {
   1917     emitUint8(0x63);
   1918   } else {
   1919     assert(false && "Unexpected vector pack operand type");
   1920   }
   1921   emitOperand(gprEncoding(Dst), Src);
   1922 }
   1923 
   1924 template <typename TraitsType>
   1925 void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
   1926                                           XmmRegister Src) {
   1927   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1928   emitUint8(0x66);
   1929   emitRexRB(RexTypeIrrelevant, Dst, Src);
   1930   emitUint8(0x0F);
   1931   if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
   1932     emitUint8(0x38);
   1933     emitUint8(0x2B);
   1934   } else if (Ty == IceType_v8i16) {
   1935     emitUint8(0x67);
   1936   } else {
   1937     assert(false && "Unexpected vector pack operand type");
   1938   }
   1939   emitXmmRegisterOperand(Dst, Src);
   1940 }
   1941 
   1942 template <typename TraitsType>
   1943 void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
   1944                                           const Address &Src) {
   1945   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1946   emitUint8(0x66);
   1947   emitAddrSizeOverridePrefix();
   1948   emitRex(RexTypeIrrelevant, Src, Dst);
   1949   emitUint8(0x0F);
   1950   if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
   1951     emitUint8(0x38);
   1952     emitUint8(0x2B);
   1953   } else if (Ty == IceType_v8i16) {
   1954     emitUint8(0x67);
   1955   } else {
   1956     assert(false && "Unexpected vector pack operand type");
   1957   }
   1958   emitOperand(gprEncoding(Dst), Src);
   1959 }
   1960 
   1961 template <typename TraitsType>
   1962 void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
   1963                                           XmmRegister src,
   1964                                           const Immediate &imm) {
   1965   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1966   emitRexRB(RexTypeIrrelevant, dst, src);
   1967   emitUint8(0x0F);
   1968   emitUint8(0xC6);
   1969   emitXmmRegisterOperand(dst, src);
   1970   assert(imm.is_uint8());
   1971   emitUint8(imm.value());
   1972 }
   1973 
   1974 template <typename TraitsType>
   1975 void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
   1976                                           const Address &src,
   1977                                           const Immediate &imm) {
   1978   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1979   emitAddrSizeOverridePrefix();
   1980   emitRex(RexTypeIrrelevant, src, dst);
   1981   emitUint8(0x0F);
   1982   emitUint8(0xC6);
   1983   static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   1984   emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
   1985   assert(imm.is_uint8());
   1986   emitUint8(imm.value());
   1987 }
   1988 
   1989 template <typename TraitsType>
   1990 void AssemblerX86Base<TraitsType>::sqrtpd(XmmRegister dst) {
   1991   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   1992   emitUint8(0x66);
   1993   emitRexRB(RexTypeIrrelevant, dst, dst);
   1994   emitUint8(0x0F);
   1995   emitUint8(0x51);
   1996   emitXmmRegisterOperand(dst, dst);
   1997 }
   1998 
   1999 template <typename TraitsType>
   2000 void AssemblerX86Base<TraitsType>::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
   2001                                             XmmRegister src) {
   2002   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2003   emitRexRB(RexTypeIrrelevant, dst, src);
   2004   emitUint8(0x0F);
   2005   emitUint8(0x5B);
   2006   emitXmmRegisterOperand(dst, src);
   2007 }
   2008 
   2009 template <typename TraitsType>
   2010 void AssemblerX86Base<TraitsType>::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
   2011                                             const Address &src) {
   2012   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2013   emitAddrSizeOverridePrefix();
   2014   emitRex(RexTypeIrrelevant, src, dst);
   2015   emitUint8(0x0F);
   2016   emitUint8(0x5B);
   2017   emitOperand(gprEncoding(dst), src);
   2018 }
   2019 
   2020 template <typename TraitsType>
   2021 void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
   2022                                              XmmRegister src) {
   2023   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2024   emitUint8(0xF3);
   2025   emitRexRB(RexTypeIrrelevant, dst, src);
   2026   emitUint8(0x0F);
   2027   emitUint8(0x5B);
   2028   emitXmmRegisterOperand(dst, src);
   2029 }
   2030 
   2031 template <typename TraitsType>
   2032 void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
   2033                                              const Address &src) {
   2034   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2035   emitUint8(0xF3);
   2036   emitAddrSizeOverridePrefix();
   2037   emitRex(RexTypeIrrelevant, src, dst);
   2038   emitUint8(0x0F);
   2039   emitUint8(0x5B);
   2040   emitOperand(gprEncoding(dst), src);
   2041 }
   2042 
   2043 template <typename TraitsType>
   2044 void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
   2045                                             XmmRegister src) {
   2046   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2047   emitUint8(0x66);
   2048   emitRexRB(RexTypeIrrelevant, dst, src);
   2049   emitUint8(0x0F);
   2050   emitUint8(0x5B);
   2051   emitXmmRegisterOperand(dst, src);
   2052 }
   2053 
   2054 template <typename TraitsType>
   2055 void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
   2056                                             const Address &src) {
   2057   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2058   emitUint8(0x66);
   2059   emitAddrSizeOverridePrefix();
   2060   emitRex(RexTypeIrrelevant, src, dst);
   2061   emitUint8(0x0F);
   2062   emitUint8(0x5B);
   2063   emitOperand(gprEncoding(dst), src);
   2064 }
   2065 
   2066 template <typename TraitsType>
   2067 void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
   2068                                             Type SrcTy, GPRRegister src) {
   2069   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2070   emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
   2071   emitRexRB(SrcTy, dst, src);
   2072   emitUint8(0x0F);
   2073   emitUint8(0x2A);
   2074   emitXmmRegisterOperand(dst, src);
   2075 }
   2076 
   2077 template <typename TraitsType>
   2078 void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
   2079                                             Type SrcTy, const Address &src) {
   2080   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2081   emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
   2082   emitAddrSizeOverridePrefix();
   2083   emitRex(SrcTy, src, dst);
   2084   emitUint8(0x0F);
   2085   emitUint8(0x2A);
   2086   emitOperand(gprEncoding(dst), src);
   2087 }
   2088 
   2089 template <typename TraitsType>
   2090 void AssemblerX86Base<TraitsType>::cvtfloat2float(Type SrcTy, XmmRegister dst,
   2091                                                   XmmRegister src) {
   2092   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2093   // ss2sd or sd2ss
   2094   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   2095   emitRexRB(RexTypeIrrelevant, dst, src);
   2096   emitUint8(0x0F);
   2097   emitUint8(0x5A);
   2098   emitXmmRegisterOperand(dst, src);
   2099 }
   2100 
   2101 template <typename TraitsType>
   2102 void AssemblerX86Base<TraitsType>::cvtfloat2float(Type SrcTy, XmmRegister dst,
   2103                                                   const Address &src) {
   2104   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2105   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   2106   emitAddrSizeOverridePrefix();
   2107   emitRex(RexTypeIrrelevant, src, dst);
   2108   emitUint8(0x0F);
   2109   emitUint8(0x5A);
   2110   emitOperand(gprEncoding(dst), src);
   2111 }
   2112 
   2113 template <typename TraitsType>
   2114 void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
   2115                                              Type SrcTy, XmmRegister src) {
   2116   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2117   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   2118   emitRexRB(DestTy, dst, src);
   2119   emitUint8(0x0F);
   2120   emitUint8(0x2C);
   2121   emitXmmRegisterOperand(dst, src);
   2122 }
   2123 
   2124 template <typename TraitsType>
   2125 void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
   2126                                              Type SrcTy, const Address &src) {
   2127   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2128   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   2129   emitAddrSizeOverridePrefix();
   2130   emitRex(DestTy, src, dst);
   2131   emitUint8(0x0F);
   2132   emitUint8(0x2C);
   2133   emitOperand(gprEncoding(dst), src);
   2134 }
   2135 
   2136 template <typename TraitsType>
   2137 void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
   2138                                             Type SrcTy, XmmRegister src) {
   2139   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2140   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   2141   emitRexRB(DestTy, dst, src);
   2142   emitUint8(0x0F);
   2143   emitUint8(0x2D);
   2144   emitXmmRegisterOperand(dst, src);
   2145 }
   2146 
   2147 template <typename TraitsType>
   2148 void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
   2149                                             Type SrcTy, const Address &src) {
   2150   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2151   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   2152   emitAddrSizeOverridePrefix();
   2153   emitRex(DestTy, src, dst);
   2154   emitUint8(0x0F);
   2155   emitUint8(0x2D);
   2156   emitOperand(gprEncoding(dst), src);
   2157 }
   2158 
   2159 template <typename TraitsType>
   2160 void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
   2161                                            XmmRegister b) {
   2162   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2163   if (Ty == IceType_f64)
   2164     emitUint8(0x66);
   2165   emitRexRB(RexTypeIrrelevant, a, b);
   2166   emitUint8(0x0F);
   2167   emitUint8(0x2E);
   2168   emitXmmRegisterOperand(a, b);
   2169 }
   2170 
   2171 template <typename TraitsType>
   2172 void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
   2173                                            const Address &b) {
   2174   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2175   if (Ty == IceType_f64)
   2176     emitUint8(0x66);
   2177   emitAddrSizeOverridePrefix();
   2178   emitRex(RexTypeIrrelevant, b, a);
   2179   emitUint8(0x0F);
   2180   emitUint8(0x2E);
   2181   emitOperand(gprEncoding(a), b);
   2182 }
   2183 
   2184 template <typename TraitsType>
   2185 void AssemblerX86Base<TraitsType>::movmsk(Type Ty, GPRRegister dst,
   2186                                           XmmRegister src) {
   2187   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2188   if (Ty == IceType_v16i8) {
   2189     emitUint8(0x66);
   2190   } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
   2191     // No operand size prefix
   2192   } else {
   2193     assert(false && "Unexpected movmsk operand type");
   2194   }
   2195   emitRexRB(RexTypeIrrelevant, dst, src);
   2196   emitUint8(0x0F);
   2197   if (Ty == IceType_v16i8) {
   2198     emitUint8(0xD7);
   2199   } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
   2200     emitUint8(0x50);
   2201   } else {
   2202     assert(false && "Unexpected movmsk operand type");
   2203   }
   2204   emitXmmRegisterOperand(dst, src);
   2205 }
   2206 
   2207 template <typename TraitsType>
   2208 void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
   2209                                         const Address &src) {
   2210   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2211   if (isScalarFloatingType(Ty))
   2212     emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   2213   emitAddrSizeOverridePrefix();
   2214   emitRex(RexTypeIrrelevant, src, dst);
   2215   emitUint8(0x0F);
   2216   emitUint8(0x51);
   2217   emitOperand(gprEncoding(dst), src);
   2218 }
   2219 
   2220 template <typename TraitsType>
   2221 void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
   2222                                         XmmRegister src) {
   2223   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2224   if (isScalarFloatingType(Ty))
   2225     emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   2226   emitRexRB(RexTypeIrrelevant, dst, src);
   2227   emitUint8(0x0F);
   2228   emitUint8(0x51);
   2229   emitXmmRegisterOperand(dst, src);
   2230 }
   2231 
   2232 template <typename TraitsType>
   2233 void AssemblerX86Base<TraitsType>::xorps(Type Ty, XmmRegister dst,
   2234                                          const Address &src) {
   2235   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2236   if (!isFloat32Asserting32Or64(Ty))
   2237     emitUint8(0x66);
   2238   emitAddrSizeOverridePrefix();
   2239   emitRex(RexTypeIrrelevant, src, dst);
   2240   emitUint8(0x0F);
   2241   emitUint8(0x57);
   2242   emitOperand(gprEncoding(dst), src);
   2243 }
   2244 
   2245 template <typename TraitsType>
   2246 void AssemblerX86Base<TraitsType>::xorps(Type Ty, XmmRegister dst,
   2247                                          XmmRegister src) {
   2248   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2249   if (!isFloat32Asserting32Or64(Ty))
   2250     emitUint8(0x66);
   2251   emitRexRB(RexTypeIrrelevant, dst, src);
   2252   emitUint8(0x0F);
   2253   emitUint8(0x57);
   2254   emitXmmRegisterOperand(dst, src);
   2255 }
   2256 
   2257 template <typename TraitsType>
   2258 void AssemblerX86Base<TraitsType>::insertps(Type Ty, XmmRegister dst,
   2259                                             XmmRegister src,
   2260                                             const Immediate &imm) {
   2261   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2262   assert(imm.is_uint8());
   2263   assert(isVectorFloatingType(Ty));
   2264   (void)Ty;
   2265   emitUint8(0x66);
   2266   emitRexRB(RexTypeIrrelevant, dst, src);
   2267   emitUint8(0x0F);
   2268   emitUint8(0x3A);
   2269   emitUint8(0x21);
   2270   emitXmmRegisterOperand(dst, src);
   2271   emitUint8(imm.value());
   2272 }
   2273 
   2274 template <typename TraitsType>
   2275 void AssemblerX86Base<TraitsType>::insertps(Type Ty, XmmRegister dst,
   2276                                             const Address &src,
   2277                                             const Immediate &imm) {
   2278   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2279   assert(imm.is_uint8());
   2280   assert(isVectorFloatingType(Ty));
   2281   (void)Ty;
   2282   emitUint8(0x66);
   2283   emitAddrSizeOverridePrefix();
   2284   emitRex(RexTypeIrrelevant, src, dst);
   2285   emitUint8(0x0F);
   2286   emitUint8(0x3A);
   2287   emitUint8(0x21);
   2288   static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   2289   emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
   2290   emitUint8(imm.value());
   2291 }
   2292 
   2293 template <typename TraitsType>
   2294 void AssemblerX86Base<TraitsType>::pinsr(Type Ty, XmmRegister dst,
   2295                                          GPRRegister src,
   2296                                          const Immediate &imm) {
   2297   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2298   assert(imm.is_uint8());
   2299   emitUint8(0x66);
   2300   emitRexRB(Ty, dst, src);
   2301   emitUint8(0x0F);
   2302   if (Ty == IceType_i16) {
   2303     emitUint8(0xC4);
   2304   } else {
   2305     emitUint8(0x3A);
   2306     emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
   2307   }
   2308   emitXmmRegisterOperand(dst, src);
   2309   emitUint8(imm.value());
   2310 }
   2311 
   2312 template <typename TraitsType>
   2313 void AssemblerX86Base<TraitsType>::pinsr(Type Ty, XmmRegister dst,
   2314                                          const Address &src,
   2315                                          const Immediate &imm) {
   2316   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2317   assert(imm.is_uint8());
   2318   emitUint8(0x66);
   2319   emitAddrSizeOverridePrefix();
   2320   emitRex(RexTypeIrrelevant, src, dst);
   2321   emitUint8(0x0F);
   2322   if (Ty == IceType_i16) {
   2323     emitUint8(0xC4);
   2324   } else {
   2325     emitUint8(0x3A);
   2326     emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
   2327   }
   2328   static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   2329   emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
   2330   emitUint8(imm.value());
   2331 }
   2332 
   2333 template <typename TraitsType>
   2334 void AssemblerX86Base<TraitsType>::pextr(Type Ty, GPRRegister dst,
   2335                                          XmmRegister src,
   2336                                          const Immediate &imm) {
   2337   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2338   assert(imm.is_uint8());
   2339   if (Ty == IceType_i16) {
   2340     emitUint8(0x66);
   2341     emitRexRB(Ty, dst, src);
   2342     emitUint8(0x0F);
   2343     emitUint8(0xC5);
   2344     emitXmmRegisterOperand(dst, src);
   2345     emitUint8(imm.value());
   2346   } else {
   2347     emitUint8(0x66);
   2348     emitRexRB(Ty, src, dst);
   2349     emitUint8(0x0F);
   2350     emitUint8(0x3A);
   2351     emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
   2352     // SSE 4.1 versions are "MRI" because dst can be mem, while pextrw (SSE2)
   2353     // is RMI because dst must be reg.
   2354     emitXmmRegisterOperand(src, dst);
   2355     emitUint8(imm.value());
   2356   }
   2357 }
   2358 
   2359 template <typename TraitsType>
   2360 void AssemblerX86Base<TraitsType>::pmovsxdq(XmmRegister dst, XmmRegister src) {
   2361   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2362   emitUint8(0x66);
   2363   emitRexRB(RexTypeIrrelevant, dst, src);
   2364   emitUint8(0x0F);
   2365   emitUint8(0x38);
   2366   emitUint8(0x25);
   2367   emitXmmRegisterOperand(dst, src);
   2368 }
   2369 
   2370 template <typename TraitsType>
   2371 void AssemblerX86Base<TraitsType>::pcmpeq(Type Ty, XmmRegister dst,
   2372                                           XmmRegister src) {
   2373   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2374   emitUint8(0x66);
   2375   emitRexRB(RexTypeIrrelevant, dst, src);
   2376   emitUint8(0x0F);
   2377   if (isByteSizedArithType(Ty)) {
   2378     emitUint8(0x74);
   2379   } else if (Ty == IceType_i16) {
   2380     emitUint8(0x75);
   2381   } else {
   2382     emitUint8(0x76);
   2383   }
   2384   emitXmmRegisterOperand(dst, src);
   2385 }
   2386 
   2387 template <typename TraitsType>
   2388 void AssemblerX86Base<TraitsType>::pcmpeq(Type Ty, XmmRegister dst,
   2389                                           const Address &src) {
   2390   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2391   emitUint8(0x66);
   2392   emitAddrSizeOverridePrefix();
   2393   emitRex(RexTypeIrrelevant, src, dst);
   2394   emitUint8(0x0F);
   2395   if (isByteSizedArithType(Ty)) {
   2396     emitUint8(0x74);
   2397   } else if (Ty == IceType_i16) {
   2398     emitUint8(0x75);
   2399   } else {
   2400     emitUint8(0x76);
   2401   }
   2402   emitOperand(gprEncoding(dst), src);
   2403 }
   2404 
   2405 template <typename TraitsType>
   2406 void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
   2407                                           XmmRegister src) {
   2408   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2409   emitUint8(0x66);
   2410   emitRexRB(RexTypeIrrelevant, dst, src);
   2411   emitUint8(0x0F);
   2412   if (isByteSizedArithType(Ty)) {
   2413     emitUint8(0x64);
   2414   } else if (Ty == IceType_i16) {
   2415     emitUint8(0x65);
   2416   } else {
   2417     emitUint8(0x66);
   2418   }
   2419   emitXmmRegisterOperand(dst, src);
   2420 }
   2421 
   2422 template <typename TraitsType>
   2423 void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
   2424                                           const Address &src) {
   2425   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2426   emitUint8(0x66);
   2427   emitAddrSizeOverridePrefix();
   2428   emitRex(RexTypeIrrelevant, src, dst);
   2429   emitUint8(0x0F);
   2430   if (isByteSizedArithType(Ty)) {
   2431     emitUint8(0x64);
   2432   } else if (Ty == IceType_i16) {
   2433     emitUint8(0x65);
   2434   } else {
   2435     emitUint8(0x66);
   2436   }
   2437   emitOperand(gprEncoding(dst), src);
   2438 }
   2439 
   2440 template <typename TraitsType>
   2441 void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
   2442                                          XmmRegister src,
   2443                                          const Immediate &mode) {
   2444   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2445   emitUint8(0x66);
   2446   emitRexRB(RexTypeIrrelevant, dst, src);
   2447   emitUint8(0x0F);
   2448   emitUint8(0x3A);
   2449   switch (Ty) {
   2450   case IceType_v4f32:
   2451     emitUint8(0x08);
   2452     break;
   2453   case IceType_f32:
   2454     emitUint8(0x0A);
   2455     break;
   2456   case IceType_f64:
   2457     emitUint8(0x0B);
   2458     break;
   2459   default:
   2460     assert(false && "Unsupported round operand type");
   2461   }
   2462   emitXmmRegisterOperand(dst, src);
   2463   // Mask precision exeption.
   2464   emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
   2465 }
   2466 
   2467 template <typename TraitsType>
   2468 void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
   2469                                          const Address &src,
   2470                                          const Immediate &mode) {
   2471   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2472   emitUint8(0x66);
   2473   emitAddrSizeOverridePrefix();
   2474   emitRex(RexTypeIrrelevant, src, dst);
   2475   emitUint8(0x0F);
   2476   emitUint8(0x3A);
   2477   switch (Ty) {
   2478   case IceType_v4f32:
   2479     emitUint8(0x08);
   2480     break;
   2481   case IceType_f32:
   2482     emitUint8(0x0A);
   2483     break;
   2484   case IceType_f64:
   2485     emitUint8(0x0B);
   2486     break;
   2487   default:
   2488     assert(false && "Unsupported round operand type");
   2489   }
   2490   emitOperand(gprEncoding(dst), src);
   2491   // Mask precision exeption.
   2492   emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
   2493 }
   2494 
   2495 template <typename TraitsType>
   2496 template <typename T, typename>
   2497 void AssemblerX86Base<TraitsType>::fnstcw(const typename T::Address &dst) {
   2498   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2499   emitAddrSizeOverridePrefix();
   2500   emitUint8(0xD9);
   2501   emitOperand(7, dst);
   2502 }
   2503 
   2504 template <typename TraitsType>
   2505 template <typename T, typename>
   2506 void AssemblerX86Base<TraitsType>::fldcw(const typename T::Address &src) {
   2507   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2508   emitAddrSizeOverridePrefix();
   2509   emitUint8(0xD9);
   2510   emitOperand(5, src);
   2511 }
   2512 
   2513 template <typename TraitsType>
   2514 template <typename T, typename>
   2515 void AssemblerX86Base<TraitsType>::fistpl(const typename T::Address &dst) {
   2516   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2517   emitAddrSizeOverridePrefix();
   2518   emitUint8(0xDF);
   2519   emitOperand(7, dst);
   2520 }
   2521 
   2522 template <typename TraitsType>
   2523 template <typename T, typename>
   2524 void AssemblerX86Base<TraitsType>::fistps(const typename T::Address &dst) {
   2525   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2526   emitAddrSizeOverridePrefix();
   2527   emitUint8(0xDB);
   2528   emitOperand(3, dst);
   2529 }
   2530 
   2531 template <typename TraitsType>
   2532 template <typename T, typename>
   2533 void AssemblerX86Base<TraitsType>::fildl(const typename T::Address &src) {
   2534   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2535   emitAddrSizeOverridePrefix();
   2536   emitUint8(0xDF);
   2537   emitOperand(5, src);
   2538 }
   2539 
   2540 template <typename TraitsType>
   2541 template <typename T, typename>
   2542 void AssemblerX86Base<TraitsType>::filds(const typename T::Address &src) {
   2543   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2544   emitAddrSizeOverridePrefix();
   2545   emitUint8(0xDB);
   2546   emitOperand(0, src);
   2547 }
   2548 
   2549 template <typename TraitsType>
   2550 template <typename, typename>
   2551 void AssemblerX86Base<TraitsType>::fincstp() {
   2552   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2553   emitUint8(0xD9);
   2554   emitUint8(0xF7);
   2555 }
   2556 
   2557 template <typename TraitsType>
   2558 template <uint32_t Tag>
   2559 void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg,
   2560                                              const Immediate &imm) {
   2561   static_assert(Tag < 8, "Tag must be between 0..7");
   2562   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2563   if (Ty == IceType_i16)
   2564     emitOperandSizeOverride();
   2565   emitRexB(Ty, reg);
   2566   if (isByteSizedType(Ty)) {
   2567     emitComplexI8(Tag, Operand(reg), imm);
   2568   } else {
   2569     emitComplex(Ty, Tag, Operand(reg), imm);
   2570   }
   2571 }
   2572 
   2573 template <typename TraitsType>
   2574 template <uint32_t Tag>
   2575 void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg0,
   2576                                              GPRRegister reg1) {
   2577   static_assert(Tag < 8, "Tag must be between 0..7");
   2578   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2579   if (Ty == IceType_i16)
   2580     emitOperandSizeOverride();
   2581   emitRexRB(Ty, reg0, reg1);
   2582   if (isByteSizedType(Ty))
   2583     emitUint8(Tag * 8 + 2);
   2584   else
   2585     emitUint8(Tag * 8 + 3);
   2586   emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
   2587 }
   2588 
   2589 template <typename TraitsType>
   2590 template <uint32_t Tag>
   2591 void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg,
   2592                                              const Address &address) {
   2593   static_assert(Tag < 8, "Tag must be between 0..7");
   2594   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2595   if (Ty == IceType_i16)
   2596     emitOperandSizeOverride();
   2597   emitAddrSizeOverridePrefix();
   2598   emitRex(Ty, address, reg);
   2599   if (isByteSizedType(Ty))
   2600     emitUint8(Tag * 8 + 2);
   2601   else
   2602     emitUint8(Tag * 8 + 3);
   2603   emitOperand(gprEncoding(reg), address);
   2604 }
   2605 
   2606 template <typename TraitsType>
   2607 template <uint32_t Tag>
   2608 void AssemblerX86Base<TraitsType>::arith_int(Type Ty, const Address &address,
   2609                                              GPRRegister reg) {
   2610   static_assert(Tag < 8, "Tag must be between 0..7");
   2611   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2612   if (Ty == IceType_i16)
   2613     emitOperandSizeOverride();
   2614   emitAddrSizeOverridePrefix();
   2615   emitRex(Ty, address, reg);
   2616   if (isByteSizedType(Ty))
   2617     emitUint8(Tag * 8 + 0);
   2618   else
   2619     emitUint8(Tag * 8 + 1);
   2620   emitOperand(gprEncoding(reg), address);
   2621 }
   2622 
   2623 template <typename TraitsType>
   2624 template <uint32_t Tag>
   2625 void AssemblerX86Base<TraitsType>::arith_int(Type Ty, const Address &address,
   2626                                              const Immediate &imm) {
   2627   static_assert(Tag < 8, "Tag must be between 0..7");
   2628   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2629   if (Ty == IceType_i16)
   2630     emitOperandSizeOverride();
   2631   emitAddrSizeOverridePrefix();
   2632   emitRex(Ty, address, RexRegIrrelevant);
   2633   if (isByteSizedType(Ty)) {
   2634     emitComplexI8(Tag, address, imm);
   2635   } else {
   2636     emitComplex(Ty, Tag, address, imm);
   2637   }
   2638 }
   2639 
   2640 template <typename TraitsType>
   2641 void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg,
   2642                                        const Immediate &imm) {
   2643   arith_int<7>(Ty, reg, imm);
   2644 }
   2645 
   2646 template <typename TraitsType>
   2647 void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg0,
   2648                                        GPRRegister reg1) {
   2649   arith_int<7>(Ty, reg0, reg1);
   2650 }
   2651 
   2652 template <typename TraitsType>
   2653 void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg,
   2654                                        const Address &address) {
   2655   arith_int<7>(Ty, reg, address);
   2656 }
   2657 
   2658 template <typename TraitsType>
   2659 void AssemblerX86Base<TraitsType>::cmp(Type Ty, const Address &address,
   2660                                        GPRRegister reg) {
   2661   arith_int<7>(Ty, address, reg);
   2662 }
   2663 
   2664 template <typename TraitsType>
   2665 void AssemblerX86Base<TraitsType>::cmp(Type Ty, const Address &address,
   2666                                        const Immediate &imm) {
   2667   arith_int<7>(Ty, address, imm);
   2668 }
   2669 
   2670 template <typename TraitsType>
   2671 void AssemblerX86Base<TraitsType>::test(Type Ty, GPRRegister reg1,
   2672                                         GPRRegister reg2) {
   2673   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2674   if (Ty == IceType_i16)
   2675     emitOperandSizeOverride();
   2676   emitRexRB(Ty, reg1, reg2);
   2677   if (isByteSizedType(Ty))
   2678     emitUint8(0x84);
   2679   else
   2680     emitUint8(0x85);
   2681   emitRegisterOperand(gprEncoding(reg1), gprEncoding(reg2));
   2682 }
   2683 
   2684 template <typename TraitsType>
   2685 void AssemblerX86Base<TraitsType>::test(Type Ty, const Address &addr,
   2686                                         GPRRegister reg) {
   2687   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2688   if (Ty == IceType_i16)
   2689     emitOperandSizeOverride();
   2690   emitAddrSizeOverridePrefix();
   2691   emitRex(Ty, addr, reg);
   2692   if (isByteSizedType(Ty))
   2693     emitUint8(0x84);
   2694   else
   2695     emitUint8(0x85);
   2696   emitOperand(gprEncoding(reg), addr);
   2697 }
   2698 
   2699 template <typename TraitsType>
   2700 void AssemblerX86Base<TraitsType>::test(Type Ty, GPRRegister reg,
   2701                                         const Immediate &immediate) {
   2702   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2703   // For registers that have a byte variant (EAX, EBX, ECX, and EDX) we only
   2704   // test the byte register to keep the encoding short. This is legal even if
   2705   // the register had high bits set since this only sets flags registers based
   2706   // on the "AND" of the two operands, and the immediate had zeros at those
   2707   // high bits.
   2708   if (immediate.is_uint8() && reg <= Traits::Last8BitGPR) {
   2709     // Use zero-extended 8-bit immediate.
   2710     emitRexB(Ty, reg);
   2711     if (reg == Traits::Encoded_Reg_Accumulator) {
   2712       emitUint8(0xA8);
   2713     } else {
   2714       emitUint8(0xF6);
   2715       emitUint8(0xC0 + gprEncoding(reg));
   2716     }
   2717     emitUint8(immediate.value() & 0xFF);
   2718   } else if (reg == Traits::Encoded_Reg_Accumulator) {
   2719     // Use short form if the destination is EAX.
   2720     if (Ty == IceType_i16)
   2721       emitOperandSizeOverride();
   2722     emitUint8(0xA9);
   2723     emitImmediate(Ty, immediate);
   2724   } else {
   2725     if (Ty == IceType_i16)
   2726       emitOperandSizeOverride();
   2727     emitRexB(Ty, reg);
   2728     emitUint8(0xF7);
   2729     emitRegisterOperand(0, gprEncoding(reg));
   2730     emitImmediate(Ty, immediate);
   2731   }
   2732 }
   2733 
   2734 template <typename TraitsType>
   2735 void AssemblerX86Base<TraitsType>::test(Type Ty, const Address &addr,
   2736                                         const Immediate &immediate) {
   2737   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2738   // If the immediate is short, we only test the byte addr to keep the encoding
   2739   // short.
   2740   if (immediate.is_uint8()) {
   2741     // Use zero-extended 8-bit immediate.
   2742     emitAddrSizeOverridePrefix();
   2743     emitRex(Ty, addr, RexRegIrrelevant);
   2744     emitUint8(0xF6);
   2745     static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   2746     emitOperand(0, addr, OffsetFromNextInstruction);
   2747     emitUint8(immediate.value() & 0xFF);
   2748   } else {
   2749     if (Ty == IceType_i16)
   2750       emitOperandSizeOverride();
   2751     emitAddrSizeOverridePrefix();
   2752     emitRex(Ty, addr, RexRegIrrelevant);
   2753     emitUint8(0xF7);
   2754     const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
   2755     emitOperand(0, addr, OffsetFromNextInstruction);
   2756     emitImmediate(Ty, immediate);
   2757   }
   2758 }
   2759 
   2760 template <typename TraitsType>
   2761 void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
   2762                                        GPRRegister src) {
   2763   arith_int<4>(Ty, dst, src);
   2764 }
   2765 
   2766 template <typename TraitsType>
   2767 void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
   2768                                        const Address &address) {
   2769   arith_int<4>(Ty, dst, address);
   2770 }
   2771 
   2772 template <typename TraitsType>
   2773 void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
   2774                                        const Immediate &imm) {
   2775   arith_int<4>(Ty, dst, imm);
   2776 }
   2777 
   2778 template <typename TraitsType>
   2779 void AssemblerX86Base<TraitsType>::And(Type Ty, const Address &address,
   2780                                        GPRRegister reg) {
   2781   arith_int<4>(Ty, address, reg);
   2782 }
   2783 
   2784 template <typename TraitsType>
   2785 void AssemblerX86Base<TraitsType>::And(Type Ty, const Address &address,
   2786                                        const Immediate &imm) {
   2787   arith_int<4>(Ty, address, imm);
   2788 }
   2789 
   2790 template <typename TraitsType>
   2791 void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
   2792                                       GPRRegister src) {
   2793   arith_int<1>(Ty, dst, src);
   2794 }
   2795 
   2796 template <typename TraitsType>
   2797 void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
   2798                                       const Address &address) {
   2799   arith_int<1>(Ty, dst, address);
   2800 }
   2801 
   2802 template <typename TraitsType>
   2803 void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
   2804                                       const Immediate &imm) {
   2805   arith_int<1>(Ty, dst, imm);
   2806 }
   2807 
   2808 template <typename TraitsType>
   2809 void AssemblerX86Base<TraitsType>::Or(Type Ty, const Address &address,
   2810                                       GPRRegister reg) {
   2811   arith_int<1>(Ty, address, reg);
   2812 }
   2813 
   2814 template <typename TraitsType>
   2815 void AssemblerX86Base<TraitsType>::Or(Type Ty, const Address &address,
   2816                                       const Immediate &imm) {
   2817   arith_int<1>(Ty, address, imm);
   2818 }
   2819 
   2820 template <typename TraitsType>
   2821 void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
   2822                                        GPRRegister src) {
   2823   arith_int<6>(Ty, dst, src);
   2824 }
   2825 
   2826 template <typename TraitsType>
   2827 void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
   2828                                        const Address &address) {
   2829   arith_int<6>(Ty, dst, address);
   2830 }
   2831 
   2832 template <typename TraitsType>
   2833 void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
   2834                                        const Immediate &imm) {
   2835   arith_int<6>(Ty, dst, imm);
   2836 }
   2837 
   2838 template <typename TraitsType>
   2839 void AssemblerX86Base<TraitsType>::Xor(Type Ty, const Address &address,
   2840                                        GPRRegister reg) {
   2841   arith_int<6>(Ty, address, reg);
   2842 }
   2843 
   2844 template <typename TraitsType>
   2845 void AssemblerX86Base<TraitsType>::Xor(Type Ty, const Address &address,
   2846                                        const Immediate &imm) {
   2847   arith_int<6>(Ty, address, imm);
   2848 }
   2849 
   2850 template <typename TraitsType>
   2851 void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister dst,
   2852                                        GPRRegister src) {
   2853   arith_int<0>(Ty, dst, src);
   2854 }
   2855 
   2856 template <typename TraitsType>
   2857 void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister reg,
   2858                                        const Address &address) {
   2859   arith_int<0>(Ty, reg, address);
   2860 }
   2861 
   2862 template <typename TraitsType>
   2863 void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister reg,
   2864                                        const Immediate &imm) {
   2865   arith_int<0>(Ty, reg, imm);
   2866 }
   2867 
   2868 template <typename TraitsType>
   2869 void AssemblerX86Base<TraitsType>::add(Type Ty, const Address &address,
   2870                                        GPRRegister reg) {
   2871   arith_int<0>(Ty, address, reg);
   2872 }
   2873 
   2874 template <typename TraitsType>
   2875 void AssemblerX86Base<TraitsType>::add(Type Ty, const Address &address,
   2876                                        const Immediate &imm) {
   2877   arith_int<0>(Ty, address, imm);
   2878 }
   2879 
   2880 template <typename TraitsType>
   2881 void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister dst,
   2882                                        GPRRegister src) {
   2883   arith_int<2>(Ty, dst, src);
   2884 }
   2885 
   2886 template <typename TraitsType>
   2887 void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister dst,
   2888                                        const Address &address) {
   2889   arith_int<2>(Ty, dst, address);
   2890 }
   2891 
   2892 template <typename TraitsType>
   2893 void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister reg,
   2894                                        const Immediate &imm) {
   2895   arith_int<2>(Ty, reg, imm);
   2896 }
   2897 
   2898 template <typename TraitsType>
   2899 void AssemblerX86Base<TraitsType>::adc(Type Ty, const Address &address,
   2900                                        GPRRegister reg) {
   2901   arith_int<2>(Ty, address, reg);
   2902 }
   2903 
   2904 template <typename TraitsType>
   2905 void AssemblerX86Base<TraitsType>::adc(Type Ty, const Address &address,
   2906                                        const Immediate &imm) {
   2907   arith_int<2>(Ty, address, imm);
   2908 }
   2909 
   2910 template <typename TraitsType>
   2911 void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister dst,
   2912                                        GPRRegister src) {
   2913   arith_int<5>(Ty, dst, src);
   2914 }
   2915 
   2916 template <typename TraitsType>
   2917 void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister reg,
   2918                                        const Address &address) {
   2919   arith_int<5>(Ty, reg, address);
   2920 }
   2921 
   2922 template <typename TraitsType>
   2923 void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister reg,
   2924                                        const Immediate &imm) {
   2925   arith_int<5>(Ty, reg, imm);
   2926 }
   2927 
   2928 template <typename TraitsType>
   2929 void AssemblerX86Base<TraitsType>::sub(Type Ty, const Address &address,
   2930                                        GPRRegister reg) {
   2931   arith_int<5>(Ty, address, reg);
   2932 }
   2933 
   2934 template <typename TraitsType>
   2935 void AssemblerX86Base<TraitsType>::sub(Type Ty, const Address &address,
   2936                                        const Immediate &imm) {
   2937   arith_int<5>(Ty, address, imm);
   2938 }
   2939 
   2940 template <typename TraitsType>
   2941 void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister dst,
   2942                                        GPRRegister src) {
   2943   arith_int<3>(Ty, dst, src);
   2944 }
   2945 
   2946 template <typename TraitsType>
   2947 void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister dst,
   2948                                        const Address &address) {
   2949   arith_int<3>(Ty, dst, address);
   2950 }
   2951 
   2952 template <typename TraitsType>
   2953 void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister reg,
   2954                                        const Immediate &imm) {
   2955   arith_int<3>(Ty, reg, imm);
   2956 }
   2957 
   2958 template <typename TraitsType>
   2959 void AssemblerX86Base<TraitsType>::sbb(Type Ty, const Address &address,
   2960                                        GPRRegister reg) {
   2961   arith_int<3>(Ty, address, reg);
   2962 }
   2963 
   2964 template <typename TraitsType>
   2965 void AssemblerX86Base<TraitsType>::sbb(Type Ty, const Address &address,
   2966                                        const Immediate &imm) {
   2967   arith_int<3>(Ty, address, imm);
   2968 }
   2969 
   2970 template <typename TraitsType> void AssemblerX86Base<TraitsType>::cbw() {
   2971   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2972   emitOperandSizeOverride();
   2973   emitUint8(0x98);
   2974 }
   2975 
   2976 template <typename TraitsType> void AssemblerX86Base<TraitsType>::cwd() {
   2977   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2978   emitOperandSizeOverride();
   2979   emitUint8(0x99);
   2980 }
   2981 
   2982 template <typename TraitsType> void AssemblerX86Base<TraitsType>::cdq() {
   2983   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2984   emitUint8(0x99);
   2985 }
   2986 
   2987 template <typename TraitsType>
   2988 template <typename T>
   2989 typename std::enable_if<T::Is64Bit, void>::type
   2990 AssemblerX86Base<TraitsType>::cqo() {
   2991   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2992   emitRexB(RexTypeForceRexW, RexRegIrrelevant);
   2993   emitUint8(0x99);
   2994 }
   2995 
   2996 template <typename TraitsType>
   2997 void AssemblerX86Base<TraitsType>::div(Type Ty, GPRRegister reg) {
   2998   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2999   if (Ty == IceType_i16)
   3000     emitOperandSizeOverride();
   3001   emitRexB(Ty, reg);
   3002   if (isByteSizedArithType(Ty))
   3003     emitUint8(0xF6);
   3004   else
   3005     emitUint8(0xF7);
   3006   emitRegisterOperand(6, gprEncoding(reg));
   3007 }
   3008 
   3009 template <typename TraitsType>
   3010 void AssemblerX86Base<TraitsType>::div(Type Ty, const Address &addr) {
   3011   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3012   if (Ty == IceType_i16)
   3013     emitOperandSizeOverride();
   3014   emitAddrSizeOverridePrefix();
   3015   emitRex(Ty, addr, RexRegIrrelevant);
   3016   if (isByteSizedArithType(Ty))
   3017     emitUint8(0xF6);
   3018   else
   3019     emitUint8(0xF7);
   3020   emitOperand(6, addr);
   3021 }
   3022 
   3023 template <typename TraitsType>
   3024 void AssemblerX86Base<TraitsType>::idiv(Type Ty, GPRRegister reg) {
   3025   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3026   if (Ty == IceType_i16)
   3027     emitOperandSizeOverride();
   3028   emitRexB(Ty, reg);
   3029   if (isByteSizedArithType(Ty))
   3030     emitUint8(0xF6);
   3031   else
   3032     emitUint8(0xF7);
   3033   emitRegisterOperand(7, gprEncoding(reg));
   3034 }
   3035 
   3036 template <typename TraitsType>
   3037 void AssemblerX86Base<TraitsType>::idiv(Type Ty, const Address &addr) {
   3038   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3039   if (Ty == IceType_i16)
   3040     emitOperandSizeOverride();
   3041   emitAddrSizeOverridePrefix();
   3042   emitRex(Ty, addr, RexRegIrrelevant);
   3043   if (isByteSizedArithType(Ty))
   3044     emitUint8(0xF6);
   3045   else
   3046     emitUint8(0xF7);
   3047   emitOperand(7, addr);
   3048 }
   3049 
   3050 template <typename TraitsType>
   3051 void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
   3052                                         GPRRegister src) {
   3053   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3054   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
   3055          (Traits::Is64Bit && Ty == IceType_i64));
   3056   if (Ty == IceType_i16)
   3057     emitOperandSizeOverride();
   3058   emitRexRB(Ty, dst, src);
   3059   emitUint8(0x0F);
   3060   emitUint8(0xAF);
   3061   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
   3062 }
   3063 
   3064 template <typename TraitsType>
   3065 void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg,
   3066                                         const Address &address) {
   3067   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3068   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
   3069          (Traits::Is64Bit && Ty == IceType_i64));
   3070   if (Ty == IceType_i16)
   3071     emitOperandSizeOverride();
   3072   emitAddrSizeOverridePrefix();
   3073   emitRex(Ty, address, reg);
   3074   emitUint8(0x0F);
   3075   emitUint8(0xAF);
   3076   emitOperand(gprEncoding(reg), address);
   3077 }
   3078 
   3079 template <typename TraitsType>
   3080 void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg,
   3081                                         const Immediate &imm) {
   3082   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3083   assert(Ty == IceType_i16 || Ty == IceType_i32 || Ty == IceType_i64);
   3084   if (Ty == IceType_i16)
   3085     emitOperandSizeOverride();
   3086   emitRexRB(Ty, reg, reg);
   3087   if (imm.is_int8()) {
   3088     emitUint8(0x6B);
   3089     emitRegisterOperand(gprEncoding(reg), gprEncoding(reg));
   3090     emitUint8(imm.value() & 0xFF);
   3091   } else {
   3092     emitUint8(0x69);
   3093     emitRegisterOperand(gprEncoding(reg), gprEncoding(reg));
   3094     emitImmediate(Ty, imm);
   3095   }
   3096 }
   3097 
   3098 template <typename TraitsType>
   3099 void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg) {
   3100   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3101   if (Ty == IceType_i16)
   3102     emitOperandSizeOverride();
   3103   emitRexB(Ty, reg);
   3104   if (isByteSizedArithType(Ty))
   3105     emitUint8(0xF6);
   3106   else
   3107     emitUint8(0xF7);
   3108   emitRegisterOperand(5, gprEncoding(reg));
   3109 }
   3110 
   3111 template <typename TraitsType>
   3112 void AssemblerX86Base<TraitsType>::imul(Type Ty, const Address &address) {
   3113   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3114   if (Ty == IceType_i16)
   3115     emitOperandSizeOverride();
   3116   emitAddrSizeOverridePrefix();
   3117   emitRex(Ty, address, RexRegIrrelevant);
   3118   if (isByteSizedArithType(Ty))
   3119     emitUint8(0xF6);
   3120   else
   3121     emitUint8(0xF7);
   3122   emitOperand(5, address);
   3123 }
   3124 
   3125 template <typename TraitsType>
   3126 void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
   3127                                         GPRRegister src, const Immediate &imm) {
   3128   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3129   assert(Ty == IceType_i16 || Ty == IceType_i32);
   3130   if (Ty == IceType_i16)
   3131     emitOperandSizeOverride();
   3132   emitRexRB(Ty, dst, src);
   3133   if (imm.is_int8()) {
   3134     emitUint8(0x6B);
   3135     emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
   3136     emitUint8(imm.value() & 0xFF);
   3137   } else {
   3138     emitUint8(0x69);
   3139     emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
   3140     emitImmediate(Ty, imm);
   3141   }
   3142 }
   3143 
   3144 template <typename TraitsType>
   3145 void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
   3146                                         const Address &address,
   3147                                         const Immediate &imm) {
   3148   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3149   assert(Ty == IceType_i16 || Ty == IceType_i32);
   3150   if (Ty == IceType_i16)
   3151     emitOperandSizeOverride();
   3152   emitAddrSizeOverridePrefix();
   3153   emitRex(Ty, address, dst);
   3154   if (imm.is_int8()) {
   3155     emitUint8(0x6B);
   3156     static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   3157     emitOperand(gprEncoding(dst), address, OffsetFromNextInstruction);
   3158     emitUint8(imm.value() & 0xFF);
   3159   } else {
   3160     emitUint8(0x69);
   3161     const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
   3162     emitOperand(gprEncoding(dst), address, OffsetFromNextInstruction);
   3163     emitImmediate(Ty, imm);
   3164   }
   3165 }
   3166 
   3167 template <typename TraitsType>
   3168 void AssemblerX86Base<TraitsType>::mul(Type Ty, GPRRegister reg) {
   3169   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3170   if (Ty == IceType_i16)
   3171     emitOperandSizeOverride();
   3172   emitRexB(Ty, reg);
   3173   if (isByteSizedArithType(Ty))
   3174     emitUint8(0xF6);
   3175   else
   3176     emitUint8(0xF7);
   3177   emitRegisterOperand(4, gprEncoding(reg));
   3178 }
   3179 
   3180 template <typename TraitsType>
   3181 void AssemblerX86Base<TraitsType>::mul(Type Ty, const Address &address) {
   3182   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3183   if (Ty == IceType_i16)
   3184     emitOperandSizeOverride();
   3185   emitAddrSizeOverridePrefix();
   3186   emitRex(Ty, address, RexRegIrrelevant);
   3187   if (isByteSizedArithType(Ty))
   3188     emitUint8(0xF6);
   3189   else
   3190     emitUint8(0xF7);
   3191   emitOperand(4, address);
   3192 }
   3193 
   3194 template <typename TraitsType>
   3195 template <typename, typename>
   3196 void AssemblerX86Base<TraitsType>::incl(GPRRegister reg) {
   3197   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3198   emitUint8(0x40 + reg);
   3199 }
   3200 
   3201 template <typename TraitsType>
   3202 void AssemblerX86Base<TraitsType>::incl(const Address &address) {
   3203   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3204   emitAddrSizeOverridePrefix();
   3205   emitRex(IceType_i32, address, RexRegIrrelevant);
   3206   emitUint8(0xFF);
   3207   emitOperand(0, address);
   3208 }
   3209 
   3210 template <typename TraitsType>
   3211 template <typename, typename>
   3212 void AssemblerX86Base<TraitsType>::decl(GPRRegister reg) {
   3213   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3214   emitUint8(0x48 + reg);
   3215 }
   3216 
   3217 template <typename TraitsType>
   3218 void AssemblerX86Base<TraitsType>::decl(const Address &address) {
   3219   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3220   emitAddrSizeOverridePrefix();
   3221   emitRex(IceType_i32, address, RexRegIrrelevant);
   3222   emitUint8(0xFF);
   3223   emitOperand(1, address);
   3224 }
   3225 
   3226 template <typename TraitsType>
   3227 void AssemblerX86Base<TraitsType>::rol(Type Ty, GPRRegister reg,
   3228                                        const Immediate &imm) {
   3229   emitGenericShift(0, Ty, reg, imm);
   3230 }
   3231 
   3232 template <typename TraitsType>
   3233 void AssemblerX86Base<TraitsType>::rol(Type Ty, GPRRegister operand,
   3234                                        GPRRegister shifter) {
   3235   emitGenericShift(0, Ty, Operand(operand), shifter);
   3236 }
   3237 
   3238 template <typename TraitsType>
   3239 void AssemblerX86Base<TraitsType>::rol(Type Ty, const Address &operand,
   3240                                        GPRRegister shifter) {
   3241   emitGenericShift(0, Ty, operand, shifter);
   3242 }
   3243 
   3244 template <typename TraitsType>
   3245 void AssemblerX86Base<TraitsType>::shl(Type Ty, GPRRegister reg,
   3246                                        const Immediate &imm) {
   3247   emitGenericShift(4, Ty, reg, imm);
   3248 }
   3249 
   3250 template <typename TraitsType>
   3251 void AssemblerX86Base<TraitsType>::shl(Type Ty, GPRRegister operand,
   3252                                        GPRRegister shifter) {
   3253   emitGenericShift(4, Ty, Operand(operand), shifter);
   3254 }
   3255 
   3256 template <typename TraitsType>
   3257 void AssemblerX86Base<TraitsType>::shl(Type Ty, const Address &operand,
   3258                                        GPRRegister shifter) {
   3259   emitGenericShift(4, Ty, operand, shifter);
   3260 }
   3261 
   3262 template <typename TraitsType>
   3263 void AssemblerX86Base<TraitsType>::shr(Type Ty, GPRRegister reg,
   3264                                        const Immediate &imm) {
   3265   emitGenericShift(5, Ty, reg, imm);
   3266 }
   3267 
   3268 template <typename TraitsType>
   3269 void AssemblerX86Base<TraitsType>::shr(Type Ty, GPRRegister operand,
   3270                                        GPRRegister shifter) {
   3271   emitGenericShift(5, Ty, Operand(operand), shifter);
   3272 }
   3273 
   3274 template <typename TraitsType>
   3275 void AssemblerX86Base<TraitsType>::shr(Type Ty, const Address &operand,
   3276                                        GPRRegister shifter) {
   3277   emitGenericShift(5, Ty, operand, shifter);
   3278 }
   3279 
   3280 template <typename TraitsType>
   3281 void AssemblerX86Base<TraitsType>::sar(Type Ty, GPRRegister reg,
   3282                                        const Immediate &imm) {
   3283   emitGenericShift(7, Ty, reg, imm);
   3284 }
   3285 
   3286 template <typename TraitsType>
   3287 void AssemblerX86Base<TraitsType>::sar(Type Ty, GPRRegister operand,
   3288                                        GPRRegister shifter) {
   3289   emitGenericShift(7, Ty, Operand(operand), shifter);
   3290 }
   3291 
   3292 template <typename TraitsType>
   3293 void AssemblerX86Base<TraitsType>::sar(Type Ty, const Address &address,
   3294                                        GPRRegister shifter) {
   3295   emitGenericShift(7, Ty, address, shifter);
   3296 }
   3297 
   3298 template <typename TraitsType>
   3299 void AssemblerX86Base<TraitsType>::shld(Type Ty, GPRRegister dst,
   3300                                         GPRRegister src) {
   3301   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3302   assert(Ty == IceType_i16 || Ty == IceType_i32);
   3303   if (Ty == IceType_i16)
   3304     emitOperandSizeOverride();
   3305   emitRexRB(Ty, src, dst);
   3306   emitUint8(0x0F);
   3307   emitUint8(0xA5);
   3308   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
   3309 }
   3310 
   3311 template <typename TraitsType>
   3312 void AssemblerX86Base<TraitsType>::shld(Type Ty, GPRRegister dst,
   3313                                         GPRRegister src, const Immediate &imm) {
   3314   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3315   assert(Ty == IceType_i16 || Ty == IceType_i32);
   3316   assert(imm.is_int8());
   3317   if (Ty == IceType_i16)
   3318     emitOperandSizeOverride();
   3319   emitRexRB(Ty, src, dst);
   3320   emitUint8(0x0F);
   3321   emitUint8(0xA4);
   3322   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
   3323   emitUint8(imm.value() & 0xFF);
   3324 }
   3325 
   3326 template <typename TraitsType>
   3327 void AssemblerX86Base<TraitsType>::shld(Type Ty, const Address &operand,
   3328                                         GPRRegister src) {
   3329   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3330   assert(Ty == IceType_i16 || Ty == IceType_i32);
   3331   if (Ty == IceType_i16)
   3332     emitOperandSizeOverride();
   3333   emitAddrSizeOverridePrefix();
   3334   emitRex(Ty, operand, src);
   3335   emitUint8(0x0F);
   3336   emitUint8(0xA5);
   3337   emitOperand(gprEncoding(src), operand);
   3338 }
   3339 
   3340 template <typename TraitsType>
   3341 void AssemblerX86Base<TraitsType>::shrd(Type Ty, GPRRegister dst,
   3342                                         GPRRegister src) {
   3343   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3344   assert(Ty == IceType_i16 || Ty == IceType_i32);
   3345   if (Ty == IceType_i16)
   3346     emitOperandSizeOverride();
   3347   emitRexRB(Ty, src, dst);
   3348   emitUint8(0x0F);
   3349   emitUint8(0xAD);
   3350   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
   3351 }
   3352 
   3353 template <typename TraitsType>
   3354 void AssemblerX86Base<TraitsType>::shrd(Type Ty, GPRRegister dst,
   3355                                         GPRRegister src, const Immediate &imm) {
   3356   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3357   assert(Ty == IceType_i16 || Ty == IceType_i32);
   3358   assert(imm.is_int8());
   3359   if (Ty == IceType_i16)
   3360     emitOperandSizeOverride();
   3361   emitRexRB(Ty, src, dst);
   3362   emitUint8(0x0F);
   3363   emitUint8(0xAC);
   3364   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
   3365   emitUint8(imm.value() & 0xFF);
   3366 }
   3367 
   3368 template <typename TraitsType>
   3369 void AssemblerX86Base<TraitsType>::shrd(Type Ty, const Address &dst,
   3370                                         GPRRegister src) {
   3371   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3372   assert(Ty == IceType_i16 || Ty == IceType_i32);
   3373   if (Ty == IceType_i16)
   3374     emitOperandSizeOverride();
   3375   emitAddrSizeOverridePrefix();
   3376   emitRex(Ty, dst, src);
   3377   emitUint8(0x0F);
   3378   emitUint8(0xAD);
   3379   emitOperand(gprEncoding(src), dst);
   3380 }
   3381 
   3382 template <typename TraitsType>
   3383 void AssemblerX86Base<TraitsType>::neg(Type Ty, GPRRegister reg) {
   3384   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3385   if (Ty == IceType_i16)
   3386     emitOperandSizeOverride();
   3387   emitRexB(Ty, reg);
   3388   if (isByteSizedArithType(Ty))
   3389     emitUint8(0xF6);
   3390   else
   3391     emitUint8(0xF7);
   3392   emitRegisterOperand(3, gprEncoding(reg));
   3393 }
   3394 
   3395 template <typename TraitsType>
   3396 void AssemblerX86Base<TraitsType>::neg(Type Ty, const Address &addr) {
   3397   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3398   if (Ty == IceType_i16)
   3399     emitOperandSizeOverride();
   3400   emitAddrSizeOverridePrefix();
   3401   emitRex(Ty, addr, RexRegIrrelevant);
   3402   if (isByteSizedArithType(Ty))
   3403     emitUint8(0xF6);
   3404   else
   3405     emitUint8(0xF7);
   3406   emitOperand(3, addr);
   3407 }
   3408 
   3409 template <typename TraitsType>
   3410 void AssemblerX86Base<TraitsType>::notl(GPRRegister reg) {
   3411   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3412   emitRexB(IceType_i32, reg);
   3413   emitUint8(0xF7);
   3414   emitUint8(0xD0 | gprEncoding(reg));
   3415 }
   3416 
   3417 template <typename TraitsType>
   3418 void AssemblerX86Base<TraitsType>::bswap(Type Ty, GPRRegister reg) {
   3419   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3420   assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
   3421   emitRexB(Ty, reg);
   3422   emitUint8(0x0F);
   3423   emitUint8(0xC8 | gprEncoding(reg));
   3424 }
   3425 
   3426 template <typename TraitsType>
   3427 void AssemblerX86Base<TraitsType>::bsf(Type Ty, GPRRegister dst,
   3428                                        GPRRegister src) {
   3429   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3430   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
   3431          (Traits::Is64Bit && Ty == IceType_i64));
   3432   if (Ty == IceType_i16)
   3433     emitOperandSizeOverride();
   3434   emitRexRB(Ty, dst, src);
   3435   emitUint8(0x0F);
   3436   emitUint8(0xBC);
   3437   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
   3438 }
   3439 
   3440 template <typename TraitsType>
   3441 void AssemblerX86Base<TraitsType>::bsf(Type Ty, GPRRegister dst,
   3442                                        const Address &src) {
   3443   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3444   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
   3445          (Traits::Is64Bit && Ty == IceType_i64));
   3446   if (Ty == IceType_i16)
   3447     emitOperandSizeOverride();
   3448   emitAddrSizeOverridePrefix();
   3449   emitRex(Ty, src, dst);
   3450   emitUint8(0x0F);
   3451   emitUint8(0xBC);
   3452   emitOperand(gprEncoding(dst), src);
   3453 }
   3454 
   3455 template <typename TraitsType>
   3456 void AssemblerX86Base<TraitsType>::bsr(Type Ty, GPRRegister dst,
   3457                                        GPRRegister src) {
   3458   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3459   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
   3460          (Traits::Is64Bit && Ty == IceType_i64));
   3461   if (Ty == IceType_i16)
   3462     emitOperandSizeOverride();
   3463   emitRexRB(Ty, dst, src);
   3464   emitUint8(0x0F);
   3465   emitUint8(0xBD);
   3466   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
   3467 }
   3468 
   3469 template <typename TraitsType>
   3470 void AssemblerX86Base<TraitsType>::bsr(Type Ty, GPRRegister dst,
   3471                                        const Address &src) {
   3472   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3473   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
   3474          (Traits::Is64Bit && Ty == IceType_i64));
   3475   if (Ty == IceType_i16)
   3476     emitOperandSizeOverride();
   3477   emitAddrSizeOverridePrefix();
   3478   emitRex(Ty, src, dst);
   3479   emitUint8(0x0F);
   3480   emitUint8(0xBD);
   3481   emitOperand(gprEncoding(dst), src);
   3482 }
   3483 
   3484 template <typename TraitsType>
   3485 void AssemblerX86Base<TraitsType>::bt(GPRRegister base, GPRRegister offset) {
   3486   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3487   emitRexRB(IceType_i32, offset, base);
   3488   emitUint8(0x0F);
   3489   emitUint8(0xA3);
   3490   emitRegisterOperand(gprEncoding(offset), gprEncoding(base));
   3491 }
   3492 
   3493 template <typename TraitsType> void AssemblerX86Base<TraitsType>::ret() {
   3494   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3495   emitUint8(0xC3);
   3496 }
   3497 
   3498 template <typename TraitsType>
   3499 void AssemblerX86Base<TraitsType>::ret(const Immediate &imm) {
   3500   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3501   emitUint8(0xC2);
   3502   assert(imm.is_uint16());
   3503   emitUint8(imm.value() & 0xFF);
   3504   emitUint8((imm.value() >> 8) & 0xFF);
   3505 }
   3506 
   3507 template <typename TraitsType>
   3508 void AssemblerX86Base<TraitsType>::nop(int size) {
   3509   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3510   // There are nops up to size 15, but for now just provide up to size 8.
   3511   assert(0 < size && size <= MAX_NOP_SIZE);
   3512   switch (size) {
   3513   case 1:
   3514     emitUint8(0x90);
   3515     break;
   3516   case 2:
   3517     emitUint8(0x66);
   3518     emitUint8(0x90);
   3519     break;
   3520   case 3:
   3521     emitUint8(0x0F);
   3522     emitUint8(0x1F);
   3523     emitUint8(0x00);
   3524     break;
   3525   case 4:
   3526     emitUint8(0x0F);
   3527     emitUint8(0x1F);
   3528     emitUint8(0x40);
   3529     emitUint8(0x00);
   3530     break;
   3531   case 5:
   3532     emitUint8(0x0F);
   3533     emitUint8(0x1F);
   3534     emitUint8(0x44);
   3535     emitUint8(0x00);
   3536     emitUint8(0x00);
   3537     break;
   3538   case 6:
   3539     emitUint8(0x66);
   3540     emitUint8(0x0F);
   3541     emitUint8(0x1F);
   3542     emitUint8(0x44);
   3543     emitUint8(0x00);
   3544     emitUint8(0x00);
   3545     break;
   3546   case 7:
   3547     emitUint8(0x0F);
   3548     emitUint8(0x1F);
   3549     emitUint8(0x80);
   3550     emitUint8(0x00);
   3551     emitUint8(0x00);
   3552     emitUint8(0x00);
   3553     emitUint8(0x00);
   3554     break;
   3555   case 8:
   3556     emitUint8(0x0F);
   3557     emitUint8(0x1F);
   3558     emitUint8(0x84);
   3559     emitUint8(0x00);
   3560     emitUint8(0x00);
   3561     emitUint8(0x00);
   3562     emitUint8(0x00);
   3563     emitUint8(0x00);
   3564     break;
   3565   default:
   3566     llvm_unreachable("Unimplemented");
   3567   }
   3568 }
   3569 
   3570 template <typename TraitsType> void AssemblerX86Base<TraitsType>::int3() {
   3571   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3572   emitUint8(0xCC);
   3573 }
   3574 
   3575 template <typename TraitsType> void AssemblerX86Base<TraitsType>::hlt() {
   3576   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3577   emitUint8(0xF4);
   3578 }
   3579 
   3580 template <typename TraitsType> void AssemblerX86Base<TraitsType>::ud2() {
   3581   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3582   emitUint8(0x0F);
   3583   emitUint8(0x0B);
   3584 }
   3585 
   3586 template <typename TraitsType>
   3587 void AssemblerX86Base<TraitsType>::j(BrCond condition, Label *label,
   3588                                      bool near) {
   3589   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3590   if (label->isBound()) {
   3591     static const int kShortSize = 2;
   3592     static const int kLongSize = 6;
   3593     intptr_t offset = label->getPosition() - Buffer.size();
   3594     assert(offset <= 0);
   3595     if (Utils::IsInt(8, offset - kShortSize)) {
   3596       // TODO(stichnot): Here and in jmp(), we may need to be more
   3597       // conservative about the backward branch distance if the branch
   3598       // instruction is within a bundle_lock sequence, because the
   3599       // distance may increase when padding is added. This isn't an issue for
   3600       // branches outside a bundle_lock, because if padding is added, the retry
   3601       // may change it to a long backward branch without affecting any of the
   3602       // bookkeeping.
   3603       emitUint8(0x70 + condition);
   3604       emitUint8((offset - kShortSize) & 0xFF);
   3605     } else {
   3606       emitUint8(0x0F);
   3607       emitUint8(0x80 + condition);
   3608       emitInt32(offset - kLongSize);
   3609     }
   3610   } else if (near) {
   3611     emitUint8(0x70 + condition);
   3612     emitNearLabelLink(label);
   3613   } else {
   3614     emitUint8(0x0F);
   3615     emitUint8(0x80 + condition);
   3616     emitLabelLink(label);
   3617   }
   3618 }
   3619 
   3620 template <typename TraitsType>
   3621 void AssemblerX86Base<TraitsType>::j(BrCond condition,
   3622                                      const ConstantRelocatable *label) {
   3623   llvm::report_fatal_error("Untested - please verify and then reenable.");
   3624   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3625   emitUint8(0x0F);
   3626   emitUint8(0x80 + condition);
   3627   auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
   3628   Fixup->set_addend(-4);
   3629   emitFixup(Fixup);
   3630   emitInt32(0);
   3631 }
   3632 
   3633 template <typename TraitsType>
   3634 void AssemblerX86Base<TraitsType>::jmp(GPRRegister reg) {
   3635   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3636   emitRexB(RexTypeIrrelevant, reg);
   3637   emitUint8(0xFF);
   3638   emitRegisterOperand(4, gprEncoding(reg));
   3639 }
   3640 
   3641 template <typename TraitsType>
   3642 void AssemblerX86Base<TraitsType>::jmp(Label *label, bool near) {
   3643   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3644   if (label->isBound()) {
   3645     static const int kShortSize = 2;
   3646     static const int kLongSize = 5;
   3647     intptr_t offset = label->getPosition() - Buffer.size();
   3648     assert(offset <= 0);
   3649     if (Utils::IsInt(8, offset - kShortSize)) {
   3650       emitUint8(0xEB);
   3651       emitUint8((offset - kShortSize) & 0xFF);
   3652     } else {
   3653       emitUint8(0xE9);
   3654       emitInt32(offset - kLongSize);
   3655     }
   3656   } else if (near) {
   3657     emitUint8(0xEB);
   3658     emitNearLabelLink(label);
   3659   } else {
   3660     emitUint8(0xE9);
   3661     emitLabelLink(label);
   3662   }
   3663 }
   3664 
   3665 template <typename TraitsType>
   3666 void AssemblerX86Base<TraitsType>::jmp(const ConstantRelocatable *label) {
   3667   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3668   emitUint8(0xE9);
   3669   auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
   3670   Fixup->set_addend(-4);
   3671   emitFixup(Fixup);
   3672   emitInt32(0);
   3673 }
   3674 
   3675 template <typename TraitsType>
   3676 void AssemblerX86Base<TraitsType>::jmp(const Immediate &abs_address) {
   3677   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3678   emitUint8(0xE9);
   3679   AssemblerFixup *Fixup =
   3680       createFixup(Traits::FK_PcRel, AssemblerFixup::NullSymbol);
   3681   Fixup->set_addend(abs_address.value() - 4);
   3682   emitFixup(Fixup);
   3683   emitInt32(0);
   3684 }
   3685 
   3686 template <typename TraitsType> void AssemblerX86Base<TraitsType>::mfence() {
   3687   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3688   emitUint8(0x0F);
   3689   emitUint8(0xAE);
   3690   emitUint8(0xF0);
   3691 }
   3692 
   3693 template <typename TraitsType> void AssemblerX86Base<TraitsType>::lock() {
   3694   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3695   emitUint8(0xF0);
   3696 }
   3697 
   3698 template <typename TraitsType>
   3699 void AssemblerX86Base<TraitsType>::cmpxchg(Type Ty, const Address &address,
   3700                                            GPRRegister reg, bool Locked) {
   3701   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3702   if (Ty == IceType_i16)
   3703     emitOperandSizeOverride();
   3704   if (Locked)
   3705     emitUint8(0xF0);
   3706   emitAddrSizeOverridePrefix();
   3707   emitRex(Ty, address, reg);
   3708   emitUint8(0x0F);
   3709   if (isByteSizedArithType(Ty))
   3710     emitUint8(0xB0);
   3711   else
   3712     emitUint8(0xB1);
   3713   emitOperand(gprEncoding(reg), address);
   3714 }
   3715 
   3716 template <typename TraitsType>
   3717 void AssemblerX86Base<TraitsType>::cmpxchg8b(const Address &address,
   3718                                              bool Locked) {
   3719   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3720   if (Locked)
   3721     emitUint8(0xF0);
   3722   emitAddrSizeOverridePrefix();
   3723   emitRex(IceType_i32, address, RexRegIrrelevant);
   3724   emitUint8(0x0F);
   3725   emitUint8(0xC7);
   3726   emitOperand(1, address);
   3727 }
   3728 
   3729 template <typename TraitsType>
   3730 void AssemblerX86Base<TraitsType>::xadd(Type Ty, const Address &addr,
   3731                                         GPRRegister reg, bool Locked) {
   3732   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3733   if (Ty == IceType_i16)
   3734     emitOperandSizeOverride();
   3735   if (Locked)
   3736     emitUint8(0xF0);
   3737   emitAddrSizeOverridePrefix();
   3738   emitRex(Ty, addr, reg);
   3739   emitUint8(0x0F);
   3740   if (isByteSizedArithType(Ty))
   3741     emitUint8(0xC0);
   3742   else
   3743     emitUint8(0xC1);
   3744   emitOperand(gprEncoding(reg), addr);
   3745 }
   3746 
   3747 template <typename TraitsType>
   3748 void AssemblerX86Base<TraitsType>::xchg(Type Ty, GPRRegister reg0,
   3749                                         GPRRegister reg1) {
   3750   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3751   if (Ty == IceType_i16)
   3752     emitOperandSizeOverride();
   3753   // Use short form if either register is EAX.
   3754   if (reg0 == Traits::Encoded_Reg_Accumulator) {
   3755     emitRexB(Ty, reg1);
   3756     emitUint8(0x90 + gprEncoding(reg1));
   3757   } else if (reg1 == Traits::Encoded_Reg_Accumulator) {
   3758     emitRexB(Ty, reg0);
   3759     emitUint8(0x90 + gprEncoding(reg0));
   3760   } else {
   3761     emitRexRB(Ty, reg0, reg1);
   3762     if (isByteSizedArithType(Ty))
   3763       emitUint8(0x86);
   3764     else
   3765       emitUint8(0x87);
   3766     emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
   3767   }
   3768 }
   3769 
   3770 template <typename TraitsType>
   3771 void AssemblerX86Base<TraitsType>::xchg(Type Ty, const Address &addr,
   3772                                         GPRRegister reg) {
   3773   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3774   if (Ty == IceType_i16)
   3775     emitOperandSizeOverride();
   3776   emitAddrSizeOverridePrefix();
   3777   emitRex(Ty, addr, reg);
   3778   if (isByteSizedArithType(Ty))
   3779     emitUint8(0x86);
   3780   else
   3781     emitUint8(0x87);
   3782   emitOperand(gprEncoding(reg), addr);
   3783 }
   3784 
   3785 template <typename TraitsType> void AssemblerX86Base<TraitsType>::iaca_start() {
   3786   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3787   emitUint8(0x0F);
   3788   emitUint8(0x0B);
   3789 
   3790   // mov $111, ebx
   3791   constexpr GPRRegister dst = Traits::GPRRegister::Encoded_Reg_ebx;
   3792   constexpr Type Ty = IceType_i32;
   3793   emitRexB(Ty, dst);
   3794   emitUint8(0xB8 + gprEncoding(dst));
   3795   emitImmediate(Ty, Immediate(111));
   3796 
   3797   emitUint8(0x64);
   3798   emitUint8(0x67);
   3799   emitUint8(0x90);
   3800 }
   3801 
   3802 template <typename TraitsType> void AssemblerX86Base<TraitsType>::iaca_end() {
   3803   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3804 
   3805   // mov $222, ebx
   3806   constexpr GPRRegister dst = Traits::GPRRegister::Encoded_Reg_ebx;
   3807   constexpr Type Ty = IceType_i32;
   3808   emitRexB(Ty, dst);
   3809   emitUint8(0xB8 + gprEncoding(dst));
   3810   emitImmediate(Ty, Immediate(222));
   3811 
   3812   emitUint8(0x64);
   3813   emitUint8(0x67);
   3814   emitUint8(0x90);
   3815 
   3816   emitUint8(0x0F);
   3817   emitUint8(0x0B);
   3818 }
   3819 
   3820 template <typename TraitsType>
   3821 void AssemblerX86Base<TraitsType>::emitSegmentOverride(uint8_t prefix) {
   3822   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3823   emitUint8(prefix);
   3824 }
   3825 
   3826 template <typename TraitsType>
   3827 void AssemblerX86Base<TraitsType>::align(intptr_t alignment, intptr_t offset) {
   3828   assert(llvm::isPowerOf2_32(alignment));
   3829   intptr_t pos = offset + Buffer.getPosition();
   3830   intptr_t mod = pos & (alignment - 1);
   3831   if (mod == 0) {
   3832     return;
   3833   }
   3834   intptr_t bytes_needed = alignment - mod;
   3835   while (bytes_needed > MAX_NOP_SIZE) {
   3836     nop(MAX_NOP_SIZE);
   3837     bytes_needed -= MAX_NOP_SIZE;
   3838   }
   3839   if (bytes_needed) {
   3840     nop(bytes_needed);
   3841   }
   3842   assert(((offset + Buffer.getPosition()) & (alignment - 1)) == 0);
   3843 }
   3844 
   3845 template <typename TraitsType>
   3846 void AssemblerX86Base<TraitsType>::bind(Label *L) {
   3847   const intptr_t Bound = Buffer.size();
   3848   assert(!L->isBound()); // Labels can only be bound once.
   3849   while (L->isLinked()) {
   3850     const intptr_t Position = L->getLinkPosition();
   3851     const intptr_t Next = Buffer.load<int32_t>(Position);
   3852     const intptr_t Offset = Bound - (Position + 4);
   3853     Buffer.store<int32_t>(Position, Offset);
   3854     L->Position = Next;
   3855   }
   3856   while (L->hasNear()) {
   3857     intptr_t Position = L->getNearPosition();
   3858     const intptr_t Offset = Bound - (Position + 1);
   3859     assert(Utils::IsInt(8, Offset));
   3860     Buffer.store<int8_t>(Position, Offset);
   3861   }
   3862   L->bindTo(Bound);
   3863 }
   3864 
   3865 template <typename TraitsType>
   3866 void AssemblerX86Base<TraitsType>::emitOperand(int rm, const Operand &operand,
   3867                                                RelocOffsetT Addend) {
   3868   assert(rm >= 0 && rm < 8);
   3869   const intptr_t length = operand.length_;
   3870   assert(length > 0);
   3871   intptr_t displacement_start = 1;
   3872   // Emit the ModRM byte updated with the given RM value.
   3873   assert((operand.encoding_[0] & 0x38) == 0);
   3874   emitUint8(operand.encoding_[0] + (rm << 3));
   3875   // Whenever the addressing mode is not register indirect, using esp == 0x4
   3876   // as the register operation indicates an SIB byte follows.
   3877   if (((operand.encoding_[0] & 0xc0) != 0xc0) &&
   3878       ((operand.encoding_[0] & 0x07) == 0x04)) {
   3879     emitUint8(operand.encoding_[1]);
   3880     displacement_start = 2;
   3881   }
   3882 
   3883   AssemblerFixup *Fixup = operand.fixup();
   3884   if (Fixup == nullptr) {
   3885     for (intptr_t i = displacement_start; i < length; i++) {
   3886       emitUint8(operand.encoding_[i]);
   3887     }
   3888     return;
   3889   }
   3890 
   3891   // Emit the fixup, and a dummy 4-byte immediate. Note that the Disp32 in
   3892   // operand.encoding_[i, i+1, i+2, i+3] is part of the constant relocatable
   3893   // used to create the fixup, so there's no need to add it to the addend.
   3894   assert(length - displacement_start == 4);
   3895   if (fixupIsPCRel(Fixup->kind())) {
   3896     Fixup->set_addend(Fixup->get_addend() - Addend);
   3897   } else {
   3898     Fixup->set_addend(Fixup->get_addend());
   3899   }
   3900   emitFixup(Fixup);
   3901   emitInt32(0);
   3902 }
   3903 
   3904 template <typename TraitsType>
   3905 void AssemblerX86Base<TraitsType>::emitImmediate(Type Ty,
   3906                                                  const Immediate &imm) {
   3907   auto *const Fixup = imm.fixup();
   3908   if (Ty == IceType_i16) {
   3909     assert(Fixup == nullptr);
   3910     emitInt16(imm.value());
   3911     return;
   3912   }
   3913 
   3914   if (Fixup == nullptr) {
   3915     emitInt32(imm.value());
   3916     return;
   3917   }
   3918 
   3919   Fixup->set_addend(Fixup->get_addend() + imm.value());
   3920   emitFixup(Fixup);
   3921   emitInt32(0);
   3922 }
   3923 
   3924 template <typename TraitsType>
   3925 void AssemblerX86Base<TraitsType>::emitComplexI8(int rm, const Operand &operand,
   3926                                                  const Immediate &immediate) {
   3927   assert(rm >= 0 && rm < 8);
   3928   assert(immediate.is_int8());
   3929   if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
   3930     // Use short form if the destination is al.
   3931     emitUint8(0x04 + (rm << 3));
   3932     emitUint8(immediate.value() & 0xFF);
   3933   } else {
   3934     // Use sign-extended 8-bit immediate.
   3935     emitUint8(0x80);
   3936     static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   3937     emitOperand(rm, operand, OffsetFromNextInstruction);
   3938     emitUint8(immediate.value() & 0xFF);
   3939   }
   3940 }
   3941 
   3942 template <typename TraitsType>
   3943 void AssemblerX86Base<TraitsType>::emitComplex(Type Ty, int rm,
   3944                                                const Operand &operand,
   3945                                                const Immediate &immediate) {
   3946   assert(rm >= 0 && rm < 8);
   3947   if (immediate.is_int8()) {
   3948     // Use sign-extended 8-bit immediate.
   3949     emitUint8(0x83);
   3950     static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   3951     emitOperand(rm, operand, OffsetFromNextInstruction);
   3952     emitUint8(immediate.value() & 0xFF);
   3953   } else if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
   3954     // Use short form if the destination is eax.
   3955     emitUint8(0x05 + (rm << 3));
   3956     emitImmediate(Ty, immediate);
   3957   } else {
   3958     emitUint8(0x81);
   3959     const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
   3960     emitOperand(rm, operand, OffsetFromNextInstruction);
   3961     emitImmediate(Ty, immediate);
   3962   }
   3963 }
   3964 
   3965 template <typename TraitsType>
   3966 void AssemblerX86Base<TraitsType>::emitLabel(Label *label,
   3967                                              intptr_t instruction_size) {
   3968   if (label->isBound()) {
   3969     intptr_t offset = label->getPosition() - Buffer.size();
   3970     assert(offset <= 0);
   3971     emitInt32(offset - instruction_size);
   3972   } else {
   3973     emitLabelLink(label);
   3974   }
   3975 }
   3976 
   3977 template <typename TraitsType>
   3978 void AssemblerX86Base<TraitsType>::emitLabelLink(Label *Label) {
   3979   assert(!Label->isBound());
   3980   intptr_t Position = Buffer.size();
   3981   emitInt32(Label->Position);
   3982   Label->linkTo(*this, Position);
   3983 }
   3984 
   3985 template <typename TraitsType>
   3986 void AssemblerX86Base<TraitsType>::emitNearLabelLink(Label *Label) {
   3987   assert(!Label->isBound());
   3988   intptr_t Position = Buffer.size();
   3989   emitUint8(0);
   3990   Label->nearLinkTo(*this, Position);
   3991 }
   3992 
   3993 template <typename TraitsType>
   3994 void AssemblerX86Base<TraitsType>::emitGenericShift(int rm, Type Ty,
   3995                                                     GPRRegister reg,
   3996                                                     const Immediate &imm) {
   3997   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   3998   // We don't assert that imm fits into 8 bits; instead, it gets masked below.
   3999   // Note that we don't mask it further (e.g. to 5 bits) because we want the
   4000   // same processor behavior regardless of whether it's an immediate (masked to
   4001   // 8 bits) or in register cl (essentially ecx masked to 8 bits).
   4002   if (Ty == IceType_i16)
   4003     emitOperandSizeOverride();
   4004   emitRexB(Ty, reg);
   4005   if (imm.value() == 1) {
   4006     emitUint8(isByteSizedArithType(Ty) ? 0xD0 : 0xD1);
   4007     emitOperand(rm, Operand(reg));
   4008   } else {
   4009     emitUint8(isByteSizedArithType(Ty) ? 0xC0 : 0xC1);
   4010     static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
   4011     emitOperand(rm, Operand(reg), OffsetFromNextInstruction);
   4012     emitUint8(imm.value() & 0xFF);
   4013   }
   4014 }
   4015 
   4016 template <typename TraitsType>
   4017 void AssemblerX86Base<TraitsType>::emitGenericShift(int rm, Type Ty,
   4018                                                     const Operand &operand,
   4019                                                     GPRRegister shifter) {
   4020   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   4021   assert(shifter == Traits::Encoded_Reg_Counter);
   4022   (void)shifter;
   4023   if (Ty == IceType_i16)
   4024     emitOperandSizeOverride();
   4025   emitRexB(Ty, operand.rm());
   4026   emitUint8(isByteSizedArithType(Ty) ? 0xD2 : 0xD3);
   4027   emitOperand(rm, operand);
   4028 }
   4029 
   4030 } // end of namespace X86NAMESPACE
   4031 } // end of namespace Ice
   4032