Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceAssemblerX86Base.h - base x86 assembler -*- C++ -*---===//
      2 //
      3 // Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
      4 // for details. All rights reserved. Use of this source code is governed by a
      5 // BSD-style license that can be found in the LICENSE file.
      6 //
      7 // Modified by the Subzero authors.
      8 //
      9 //===----------------------------------------------------------------------===//
     10 //
     11 //                        The Subzero Code Generator
     12 //
     13 // This file is distributed under the University of Illinois Open Source
     14 // License. See LICENSE.TXT for details.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 //
     18 /// \file
     19 /// \brief Defines the AssemblerX86 template class for x86, the base of all X86
     20 /// assemblers.
     21 //
     22 //===----------------------------------------------------------------------===//
     23 
     24 #ifndef SUBZERO_SRC_ICEASSEMBLERX86BASE_H
     25 #define SUBZERO_SRC_ICEASSEMBLERX86BASE_H
     26 
     27 #include "IceAssembler.h"
     28 #include "IceDefs.h"
     29 #include "IceOperand.h"
     30 #include "IceTypes.h"
     31 #include "IceUtils.h"
     32 
     33 namespace Ice {
     34 
     35 #ifndef X86NAMESPACE
     36 #error "You must define the X86 Target namespace."
     37 #endif
     38 
     39 namespace X86NAMESPACE {
     40 
     41 template <typename TraitsType>
     42 class AssemblerX86Base : public ::Ice::Assembler {
     43   AssemblerX86Base(const AssemblerX86Base &) = delete;
     44   AssemblerX86Base &operator=(const AssemblerX86Base &) = delete;
     45 
     46 protected:
     47   explicit AssemblerX86Base(
     48       bool EmitAddrSizeOverridePrefix = TraitsType::Is64Bit)
     49       : Assembler(Traits::AsmKind),
     50         EmitAddrSizeOverridePrefix(EmitAddrSizeOverridePrefix) {
     51     assert(Traits::Is64Bit || !EmitAddrSizeOverridePrefix);
     52   }
     53 
     54 public:
     55   using Traits = TraitsType;
     56   using Address = typename Traits::Address;
     57   using ByteRegister = typename Traits::ByteRegister;
     58   using BrCond = typename Traits::Cond::BrCond;
     59   using CmppsCond = typename Traits::Cond::CmppsCond;
     60   using GPRRegister = typename Traits::GPRRegister;
     61   using Operand = typename Traits::Operand;
     62   using XmmRegister = typename Traits::XmmRegister;
     63 
     64   static constexpr int MAX_NOP_SIZE = 8;
     65 
     66   static bool classof(const Assembler *Asm) {
     67     return Asm->getKind() == Traits::AsmKind;
     68   }
     69 
     70   class Immediate {
     71     Immediate(const Immediate &) = delete;
     72     Immediate &operator=(const Immediate &) = delete;
     73 
     74   public:
     75     explicit Immediate(int32_t value) : value_(value) {}
     76 
     77     explicit Immediate(AssemblerFixup *fixup) : fixup_(fixup) {}
     78 
     79     int32_t value() const { return value_; }
     80     AssemblerFixup *fixup() const { return fixup_; }
     81 
     82     bool is_int8() const {
     83       // We currently only allow 32-bit fixups, and they usually have value = 0,
     84       // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
     85       return fixup_ == nullptr && Utils::IsInt(8, value_);
     86     }
     87     bool is_uint8() const {
     88       return fixup_ == nullptr && Utils::IsUint(8, value_);
     89     }
     90     bool is_uint16() const {
     91       return fixup_ == nullptr && Utils::IsUint(16, value_);
     92     }
     93 
     94   private:
     95     const int32_t value_ = 0;
     96     AssemblerFixup *fixup_ = nullptr;
     97   };
     98 
     99   /// X86 allows near and far jumps.
    100   class Label final : public Ice::Label {
    101     Label(const Label &) = delete;
    102     Label &operator=(const Label &) = delete;
    103 
    104   public:
    105     Label() = default;
    106     ~Label() = default;
    107 
    108     void finalCheck() const override {
    109       Ice::Label::finalCheck();
    110       assert(!hasNear());
    111     }
    112 
    113     /// Returns the position of an earlier branch instruction which assumes that
    114     /// this label is "near", and bumps iterator to the next near position.
    115     intptr_t getNearPosition() {
    116       assert(hasNear());
    117       intptr_t Pos = UnresolvedNearPositions.back();
    118       UnresolvedNearPositions.pop_back();
    119       return Pos;
    120     }
    121 
    122     bool hasNear() const { return !UnresolvedNearPositions.empty(); }
    123     bool isUnused() const override {
    124       return Ice::Label::isUnused() && !hasNear();
    125     }
    126 
    127   private:
    128     friend class AssemblerX86Base<TraitsType>;
    129 
    130     void nearLinkTo(const Assembler &Asm, intptr_t position) {
    131       if (Asm.getPreliminary())
    132         return;
    133       assert(!isBound());
    134       UnresolvedNearPositions.push_back(position);
    135     }
    136 
    137     llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
    138   };
    139 
    140 public:
    141   ~AssemblerX86Base() override;
    142 
    143   static const bool kNearJump = true;
    144   static const bool kFarJump = false;
    145 
    146   void alignFunction() override;
    147 
    148   SizeT getBundleAlignLog2Bytes() const override { return 5; }
    149 
    150   const char *getAlignDirective() const override { return ".p2align"; }
    151 
    152   llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
    153     static const uint8_t Padding[] = {0xF4};
    154     return llvm::ArrayRef<uint8_t>(Padding, 1);
    155   }
    156 
    157   void padWithNop(intptr_t Padding) override {
    158     while (Padding > MAX_NOP_SIZE) {
    159       nop(MAX_NOP_SIZE);
    160       Padding -= MAX_NOP_SIZE;
    161     }
    162     if (Padding)
    163       nop(Padding);
    164   }
    165 
    166   Ice::Label *getCfgNodeLabel(SizeT NodeNumber) override;
    167   void bindCfgNodeLabel(const CfgNode *Node) override;
    168   Label *getOrCreateCfgNodeLabel(SizeT Number);
    169   Label *getOrCreateLocalLabel(SizeT Number);
    170   void bindLocalLabel(SizeT Number);
    171 
    172   bool fixupIsPCRel(FixupKind Kind) const override {
    173     // Currently assuming this is the only PC-rel relocation type used.
    174     // TODO(jpp): Traits.PcRelTypes.count(Kind) != 0
    175     return Kind == Traits::FK_PcRel;
    176   }
    177 
    178   // Operations to emit GPR instructions (and dispatch on operand type).
    179   using TypedEmitGPR = void (AssemblerX86Base::*)(Type, GPRRegister);
    180   using TypedEmitAddr = void (AssemblerX86Base::*)(Type, const Address &);
    181   struct GPREmitterOneOp {
    182     TypedEmitGPR Reg;
    183     TypedEmitAddr Addr;
    184   };
    185 
    186   using TypedEmitGPRGPR = void (AssemblerX86Base::*)(Type, GPRRegister,
    187                                                      GPRRegister);
    188   using TypedEmitGPRAddr = void (AssemblerX86Base::*)(Type, GPRRegister,
    189                                                       const Address &);
    190   using TypedEmitGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
    191                                                      const Immediate &);
    192   struct GPREmitterRegOp {
    193     TypedEmitGPRGPR GPRGPR;
    194     TypedEmitGPRAddr GPRAddr;
    195     TypedEmitGPRImm GPRImm;
    196   };
    197 
    198   struct GPREmitterShiftOp {
    199     // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are
    200     // not. In practice, we always normalize the Dest to a Register first.
    201     TypedEmitGPRGPR GPRGPR;
    202     TypedEmitGPRImm GPRImm;
    203   };
    204 
    205   using TypedEmitGPRGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
    206                                                         GPRRegister,
    207                                                         const Immediate &);
    208   struct GPREmitterShiftD {
    209     // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
    210     // always normalize Dest to a Register first.
    211     TypedEmitGPRGPR GPRGPR;
    212     TypedEmitGPRGPRImm GPRGPRImm;
    213   };
    214 
    215   using TypedEmitAddrGPR = void (AssemblerX86Base::*)(Type, const Address &,
    216                                                       GPRRegister);
    217   using TypedEmitAddrImm = void (AssemblerX86Base::*)(Type, const Address &,
    218                                                       const Immediate &);
    219   struct GPREmitterAddrOp {
    220     TypedEmitAddrGPR AddrGPR;
    221     TypedEmitAddrImm AddrImm;
    222   };
    223 
    224   // Operations to emit XMM instructions (and dispatch on operand type).
    225   using TypedEmitXmmXmm = void (AssemblerX86Base::*)(Type, XmmRegister,
    226                                                      XmmRegister);
    227   using TypedEmitXmmAddr = void (AssemblerX86Base::*)(Type, XmmRegister,
    228                                                       const Address &);
    229   struct XmmEmitterRegOp {
    230     TypedEmitXmmXmm XmmXmm;
    231     TypedEmitXmmAddr XmmAddr;
    232   };
    233 
    234   using EmitXmmXmm = void (AssemblerX86Base::*)(XmmRegister, XmmRegister);
    235   using EmitXmmAddr = void (AssemblerX86Base::*)(XmmRegister, const Address &);
    236   using EmitAddrXmm = void (AssemblerX86Base::*)(const Address &, XmmRegister);
    237   struct XmmEmitterMovOps {
    238     EmitXmmXmm XmmXmm;
    239     EmitXmmAddr XmmAddr;
    240     EmitAddrXmm AddrXmm;
    241   };
    242 
    243   using TypedEmitXmmImm = void (AssemblerX86Base::*)(Type, XmmRegister,
    244                                                      const Immediate &);
    245 
    246   struct XmmEmitterShiftOp {
    247     TypedEmitXmmXmm XmmXmm;
    248     TypedEmitXmmAddr XmmAddr;
    249     TypedEmitXmmImm XmmImm;
    250   };
    251 
    252   // Cross Xmm/GPR cast instructions.
    253   template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
    254     using TypedEmitRegs = void (AssemblerX86Base::*)(Type, DReg_t, Type,
    255                                                      SReg_t);
    256     using TypedEmitAddr = void (AssemblerX86Base::*)(Type, DReg_t, Type,
    257                                                      const Address &);
    258 
    259     TypedEmitRegs RegReg;
    260     TypedEmitAddr RegAddr;
    261   };
    262 
    263   // Three operand (potentially) cross Xmm/GPR instructions. The last operand
    264   // must be an immediate.
    265   template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
    266     using TypedEmitRegRegImm = void (AssemblerX86Base::*)(Type, DReg_t, SReg_t,
    267                                                           const Immediate &);
    268     using TypedEmitRegAddrImm = void (AssemblerX86Base::*)(Type, DReg_t,
    269                                                            const Address &,
    270                                                            const Immediate &);
    271 
    272     TypedEmitRegRegImm RegRegImm;
    273     TypedEmitRegAddrImm RegAddrImm;
    274   };
    275 
    276   /*
    277    * Emit Machine Instructions.
    278    */
    279   void call(GPRRegister reg);
    280   void call(const Address &address);
    281   void call(const ConstantRelocatable *label); // not testable.
    282   void call(const Immediate &abs_address);
    283 
    284   static const intptr_t kCallExternalLabelSize = 5;
    285 
    286   void pushl(GPRRegister reg);
    287   void pushl(const Immediate &Imm);
    288   void pushl(const ConstantRelocatable *Label);
    289 
    290   void popl(GPRRegister reg);
    291   void popl(const Address &address);
    292 
    293   template <typename T = Traits,
    294             typename = typename std::enable_if<T::HasPusha>::type>
    295   void pushal();
    296   template <typename T = Traits,
    297             typename = typename std::enable_if<T::HasPopa>::type>
    298   void popal();
    299 
    300   void setcc(BrCond condition, ByteRegister dst);
    301   void setcc(BrCond condition, const Address &address);
    302 
    303   void mov(Type Ty, GPRRegister dst, const Immediate &src);
    304   void mov(Type Ty, GPRRegister dst, GPRRegister src);
    305   void mov(Type Ty, GPRRegister dst, const Address &src);
    306   void mov(Type Ty, const Address &dst, GPRRegister src);
    307   void mov(Type Ty, const Address &dst, const Immediate &imm);
    308 
    309   template <typename T = Traits>
    310   typename std::enable_if<T::Is64Bit, void>::type movabs(const GPRRegister Dst,
    311                                                          uint64_t Imm64);
    312   template <typename T = Traits>
    313   typename std::enable_if<!T::Is64Bit, void>::type movabs(const GPRRegister,
    314                                                           uint64_t) {
    315     llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
    316   }
    317 
    318   void movzx(Type Ty, GPRRegister dst, GPRRegister src);
    319   void movzx(Type Ty, GPRRegister dst, const Address &src);
    320   void movsx(Type Ty, GPRRegister dst, GPRRegister src);
    321   void movsx(Type Ty, GPRRegister dst, const Address &src);
    322 
    323   void lea(Type Ty, GPRRegister dst, const Address &src);
    324 
    325   void cmov(Type Ty, BrCond cond, GPRRegister dst, GPRRegister src);
    326   void cmov(Type Ty, BrCond cond, GPRRegister dst, const Address &src);
    327 
    328   void rep_movsb();
    329 
    330   void movss(Type Ty, XmmRegister dst, const Address &src);
    331   void movss(Type Ty, const Address &dst, XmmRegister src);
    332   void movss(Type Ty, XmmRegister dst, XmmRegister src);
    333 
    334   void movd(Type SrcTy, XmmRegister dst, GPRRegister src);
    335   void movd(Type SrcTy, XmmRegister dst, const Address &src);
    336   void movd(Type DestTy, GPRRegister dst, XmmRegister src);
    337   void movd(Type DestTy, const Address &dst, XmmRegister src);
    338 
    339   void movq(XmmRegister dst, XmmRegister src);
    340   void movq(const Address &dst, XmmRegister src);
    341   void movq(XmmRegister dst, const Address &src);
    342 
    343   void addss(Type Ty, XmmRegister dst, XmmRegister src);
    344   void addss(Type Ty, XmmRegister dst, const Address &src);
    345   void subss(Type Ty, XmmRegister dst, XmmRegister src);
    346   void subss(Type Ty, XmmRegister dst, const Address &src);
    347   void mulss(Type Ty, XmmRegister dst, XmmRegister src);
    348   void mulss(Type Ty, XmmRegister dst, const Address &src);
    349   void divss(Type Ty, XmmRegister dst, XmmRegister src);
    350   void divss(Type Ty, XmmRegister dst, const Address &src);
    351 
    352   void movaps(XmmRegister dst, XmmRegister src);
    353 
    354   void movups(XmmRegister dst, XmmRegister src);
    355   void movups(XmmRegister dst, const Address &src);
    356   void movups(const Address &dst, XmmRegister src);
    357 
    358   void padd(Type Ty, XmmRegister dst, XmmRegister src);
    359   void padd(Type Ty, XmmRegister dst, const Address &src);
    360   void padds(Type Ty, XmmRegister dst, XmmRegister src);
    361   void padds(Type Ty, XmmRegister dst, const Address &src);
    362   void paddus(Type Ty, XmmRegister dst, XmmRegister src);
    363   void paddus(Type Ty, XmmRegister dst, const Address &src);
    364   void pand(Type Ty, XmmRegister dst, XmmRegister src);
    365   void pand(Type Ty, XmmRegister dst, const Address &src);
    366   void pandn(Type Ty, XmmRegister dst, XmmRegister src);
    367   void pandn(Type Ty, XmmRegister dst, const Address &src);
    368   void pmull(Type Ty, XmmRegister dst, XmmRegister src);
    369   void pmull(Type Ty, XmmRegister dst, const Address &src);
    370   void pmulhw(Type Ty, XmmRegister dst, XmmRegister src);
    371   void pmulhw(Type Ty, XmmRegister dst, const Address &src);
    372   void pmulhuw(Type Ty, XmmRegister dst, XmmRegister src);
    373   void pmulhuw(Type Ty, XmmRegister dst, const Address &src);
    374   void pmaddwd(Type Ty, XmmRegister dst, XmmRegister src);
    375   void pmaddwd(Type Ty, XmmRegister dst, const Address &src);
    376   void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
    377   void pmuludq(Type Ty, XmmRegister dst, const Address &src);
    378   void por(Type Ty, XmmRegister dst, XmmRegister src);
    379   void por(Type Ty, XmmRegister dst, const Address &src);
    380   void psub(Type Ty, XmmRegister dst, XmmRegister src);
    381   void psub(Type Ty, XmmRegister dst, const Address &src);
    382   void psubs(Type Ty, XmmRegister dst, XmmRegister src);
    383   void psubs(Type Ty, XmmRegister dst, const Address &src);
    384   void psubus(Type Ty, XmmRegister dst, XmmRegister src);
    385   void psubus(Type Ty, XmmRegister dst, const Address &src);
    386   void pxor(Type Ty, XmmRegister dst, XmmRegister src);
    387   void pxor(Type Ty, XmmRegister dst, const Address &src);
    388 
    389   void psll(Type Ty, XmmRegister dst, XmmRegister src);
    390   void psll(Type Ty, XmmRegister dst, const Address &src);
    391   void psll(Type Ty, XmmRegister dst, const Immediate &src);
    392 
    393   void psra(Type Ty, XmmRegister dst, XmmRegister src);
    394   void psra(Type Ty, XmmRegister dst, const Address &src);
    395   void psra(Type Ty, XmmRegister dst, const Immediate &src);
    396   void psrl(Type Ty, XmmRegister dst, XmmRegister src);
    397   void psrl(Type Ty, XmmRegister dst, const Address &src);
    398   void psrl(Type Ty, XmmRegister dst, const Immediate &src);
    399 
    400   void addps(Type Ty, XmmRegister dst, XmmRegister src);
    401   void addps(Type Ty, XmmRegister dst, const Address &src);
    402   void subps(Type Ty, XmmRegister dst, XmmRegister src);
    403   void subps(Type Ty, XmmRegister dst, const Address &src);
    404   void divps(Type Ty, XmmRegister dst, XmmRegister src);
    405   void divps(Type Ty, XmmRegister dst, const Address &src);
    406   void mulps(Type Ty, XmmRegister dst, XmmRegister src);
    407   void mulps(Type Ty, XmmRegister dst, const Address &src);
    408   void minps(Type Ty, XmmRegister dst, const Address &src);
    409   void minps(Type Ty, XmmRegister dst, XmmRegister src);
    410   void minss(Type Ty, XmmRegister dst, const Address &src);
    411   void minss(Type Ty, XmmRegister dst, XmmRegister src);
    412   void maxps(Type Ty, XmmRegister dst, const Address &src);
    413   void maxps(Type Ty, XmmRegister dst, XmmRegister src);
    414   void maxss(Type Ty, XmmRegister dst, const Address &src);
    415   void maxss(Type Ty, XmmRegister dst, XmmRegister src);
    416   void andnps(Type Ty, XmmRegister dst, const Address &src);
    417   void andnps(Type Ty, XmmRegister dst, XmmRegister src);
    418   void andps(Type Ty, XmmRegister dst, const Address &src);
    419   void andps(Type Ty, XmmRegister dst, XmmRegister src);
    420   void orps(Type Ty, XmmRegister dst, const Address &src);
    421   void orps(Type Ty, XmmRegister dst, XmmRegister src);
    422 
    423   void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
    424   void blendvps(Type Ty, XmmRegister dst, const Address &src);
    425   void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
    426   void pblendvb(Type Ty, XmmRegister dst, const Address &src);
    427 
    428   void cmpps(Type Ty, XmmRegister dst, XmmRegister src, CmppsCond CmpCondition);
    429   void cmpps(Type Ty, XmmRegister dst, const Address &src,
    430              CmppsCond CmpCondition);
    431 
    432   void sqrtps(XmmRegister dst);
    433   void rsqrtps(XmmRegister dst);
    434   void reciprocalps(XmmRegister dst);
    435 
    436   void movhlps(XmmRegister dst, XmmRegister src);
    437   void movlhps(XmmRegister dst, XmmRegister src);
    438   void unpcklps(XmmRegister dst, XmmRegister src);
    439   void unpckhps(XmmRegister dst, XmmRegister src);
    440   void unpcklpd(XmmRegister dst, XmmRegister src);
    441   void unpckhpd(XmmRegister dst, XmmRegister src);
    442 
    443   void set1ps(XmmRegister dst, GPRRegister tmp, const Immediate &imm);
    444 
    445   void sqrtpd(XmmRegister dst);
    446 
    447   void pshufb(Type Ty, XmmRegister dst, XmmRegister src);
    448   void pshufb(Type Ty, XmmRegister dst, const Address &src);
    449   void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
    450   void pshufd(Type Ty, XmmRegister dst, const Address &src,
    451               const Immediate &mask);
    452   void punpckl(Type Ty, XmmRegister Dst, XmmRegister Src);
    453   void punpckl(Type Ty, XmmRegister Dst, const Address &Src);
    454   void punpckh(Type Ty, XmmRegister Dst, XmmRegister Src);
    455   void punpckh(Type Ty, XmmRegister Dst, const Address &Src);
    456   void packss(Type Ty, XmmRegister Dst, XmmRegister Src);
    457   void packss(Type Ty, XmmRegister Dst, const Address &Src);
    458   void packus(Type Ty, XmmRegister Dst, XmmRegister Src);
    459   void packus(Type Ty, XmmRegister Dst, const Address &Src);
    460   void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
    461   void shufps(Type Ty, XmmRegister dst, const Address &src,
    462               const Immediate &mask);
    463 
    464   void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
    465   void cvtdq2ps(Type, XmmRegister dst, const Address &src);
    466 
    467   void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
    468   void cvttps2dq(Type, XmmRegister dst, const Address &src);
    469 
    470   void cvtps2dq(Type, XmmRegister dst, XmmRegister src);
    471   void cvtps2dq(Type, XmmRegister dst, const Address &src);
    472 
    473   void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
    474   void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
    475 
    476   void cvtfloat2float(Type SrcTy, XmmRegister dst, XmmRegister src);
    477   void cvtfloat2float(Type SrcTy, XmmRegister dst, const Address &src);
    478 
    479   void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
    480   void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
    481 
    482   void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
    483   void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
    484 
    485   void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
    486   void ucomiss(Type Ty, XmmRegister a, const Address &b);
    487 
    488   void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
    489 
    490   void sqrt(Type Ty, XmmRegister dst, const Address &src);
    491   void sqrt(Type Ty, XmmRegister dst, XmmRegister src);
    492 
    493   void xorps(Type Ty, XmmRegister dst, const Address &src);
    494   void xorps(Type Ty, XmmRegister dst, XmmRegister src);
    495 
    496   void insertps(Type Ty, XmmRegister dst, XmmRegister src,
    497                 const Immediate &imm);
    498   void insertps(Type Ty, XmmRegister dst, const Address &src,
    499                 const Immediate &imm);
    500 
    501   void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
    502   void pinsr(Type Ty, XmmRegister dst, const Address &src,
    503              const Immediate &imm);
    504 
    505   void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
    506 
    507   void pmovsxdq(XmmRegister dst, XmmRegister src);
    508 
    509   void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
    510   void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
    511   void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
    512   void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
    513 
    514   enum RoundingMode {
    515     kRoundToNearest = 0x0,
    516     kRoundDown = 0x1,
    517     kRoundUp = 0x2,
    518     kRoundToZero = 0x3
    519   };
    520   void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
    521   void round(Type Ty, XmmRegister dst, const Address &src,
    522              const Immediate &mode);
    523 
    524   //----------------------------------------------------------------------------
    525   //
    526   // Begin: X87 instructions. Only available when Traits::UsesX87.
    527   //
    528   //----------------------------------------------------------------------------
    529   template <typename T = Traits,
    530             typename = typename std::enable_if<T::UsesX87>::type>
    531   void fld(Type Ty, const typename T::Address &src);
    532   template <typename T = Traits,
    533             typename = typename std::enable_if<T::UsesX87>::type>
    534   void fstp(Type Ty, const typename T::Address &dst);
    535   template <typename T = Traits,
    536             typename = typename std::enable_if<T::UsesX87>::type>
    537   void fstp(typename T::X87STRegister st);
    538 
    539   template <typename T = Traits,
    540             typename = typename std::enable_if<T::UsesX87>::type>
    541   void fnstcw(const typename T::Address &dst);
    542   template <typename T = Traits,
    543             typename = typename std::enable_if<T::UsesX87>::type>
    544   void fldcw(const typename T::Address &src);
    545 
    546   template <typename T = Traits,
    547             typename = typename std::enable_if<T::UsesX87>::type>
    548   void fistpl(const typename T::Address &dst);
    549   template <typename T = Traits,
    550             typename = typename std::enable_if<T::UsesX87>::type>
    551   void fistps(const typename T::Address &dst);
    552   template <typename T = Traits,
    553             typename = typename std::enable_if<T::UsesX87>::type>
    554   void fildl(const typename T::Address &src);
    555   template <typename T = Traits,
    556             typename = typename std::enable_if<T::UsesX87>::type>
    557   void filds(const typename T::Address &src);
    558 
    559   template <typename T = Traits,
    560             typename = typename std::enable_if<T::UsesX87>::type>
    561   void fincstp();
    562   //----------------------------------------------------------------------------
    563   //
    564   // End: X87 instructions.
    565   //
    566   //----------------------------------------------------------------------------
    567 
    568   void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
    569   void cmp(Type Ty, GPRRegister reg, const Address &address);
    570   void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
    571   void cmp(Type Ty, const Address &address, GPRRegister reg);
    572   void cmp(Type Ty, const Address &address, const Immediate &imm);
    573 
    574   void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
    575   void test(Type Ty, GPRRegister reg, const Immediate &imm);
    576   void test(Type Ty, const Address &address, GPRRegister reg);
    577   void test(Type Ty, const Address &address, const Immediate &imm);
    578 
    579   void And(Type Ty, GPRRegister dst, GPRRegister src);
    580   void And(Type Ty, GPRRegister dst, const Address &address);
    581   void And(Type Ty, GPRRegister dst, const Immediate &imm);
    582   void And(Type Ty, const Address &address, GPRRegister reg);
    583   void And(Type Ty, const Address &address, const Immediate &imm);
    584 
    585   void Or(Type Ty, GPRRegister dst, GPRRegister src);
    586   void Or(Type Ty, GPRRegister dst, const Address &address);
    587   void Or(Type Ty, GPRRegister dst, const Immediate &imm);
    588   void Or(Type Ty, const Address &address, GPRRegister reg);
    589   void Or(Type Ty, const Address &address, const Immediate &imm);
    590 
    591   void Xor(Type Ty, GPRRegister dst, GPRRegister src);
    592   void Xor(Type Ty, GPRRegister dst, const Address &address);
    593   void Xor(Type Ty, GPRRegister dst, const Immediate &imm);
    594   void Xor(Type Ty, const Address &address, GPRRegister reg);
    595   void Xor(Type Ty, const Address &address, const Immediate &imm);
    596 
    597   void add(Type Ty, GPRRegister dst, GPRRegister src);
    598   void add(Type Ty, GPRRegister reg, const Address &address);
    599   void add(Type Ty, GPRRegister reg, const Immediate &imm);
    600   void add(Type Ty, const Address &address, GPRRegister reg);
    601   void add(Type Ty, const Address &address, const Immediate &imm);
    602 
    603   void adc(Type Ty, GPRRegister dst, GPRRegister src);
    604   void adc(Type Ty, GPRRegister dst, const Address &address);
    605   void adc(Type Ty, GPRRegister reg, const Immediate &imm);
    606   void adc(Type Ty, const Address &address, GPRRegister reg);
    607   void adc(Type Ty, const Address &address, const Immediate &imm);
    608 
    609   void sub(Type Ty, GPRRegister dst, GPRRegister src);
    610   void sub(Type Ty, GPRRegister reg, const Address &address);
    611   void sub(Type Ty, GPRRegister reg, const Immediate &imm);
    612   void sub(Type Ty, const Address &address, GPRRegister reg);
    613   void sub(Type Ty, const Address &address, const Immediate &imm);
    614 
    615   void sbb(Type Ty, GPRRegister dst, GPRRegister src);
    616   void sbb(Type Ty, GPRRegister reg, const Address &address);
    617   void sbb(Type Ty, GPRRegister reg, const Immediate &imm);
    618   void sbb(Type Ty, const Address &address, GPRRegister reg);
    619   void sbb(Type Ty, const Address &address, const Immediate &imm);
    620 
    621   void cbw();
    622   void cwd();
    623   void cdq();
    624   template <typename T = Traits>
    625   typename std::enable_if<T::Is64Bit, void>::type cqo();
    626   template <typename T = Traits>
    627   typename std::enable_if<!T::Is64Bit, void>::type cqo() {
    628     llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
    629   }
    630 
    631   void div(Type Ty, GPRRegister reg);
    632   void div(Type Ty, const Address &address);
    633 
    634   void idiv(Type Ty, GPRRegister reg);
    635   void idiv(Type Ty, const Address &address);
    636 
    637   void imul(Type Ty, GPRRegister dst, GPRRegister src);
    638   void imul(Type Ty, GPRRegister reg, const Immediate &imm);
    639   void imul(Type Ty, GPRRegister reg, const Address &address);
    640 
    641   void imul(Type Ty, GPRRegister reg);
    642   void imul(Type Ty, const Address &address);
    643 
    644   void imul(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
    645   void imul(Type Ty, GPRRegister dst, const Address &address,
    646             const Immediate &imm);
    647 
    648   void mul(Type Ty, GPRRegister reg);
    649   void mul(Type Ty, const Address &address);
    650 
    651   template <class T = Traits,
    652             typename = typename std::enable_if<!T::Is64Bit>::type>
    653   void incl(GPRRegister reg);
    654   void incl(const Address &address);
    655 
    656   template <class T = Traits,
    657             typename = typename std::enable_if<!T::Is64Bit>::type>
    658   void decl(GPRRegister reg);
    659   void decl(const Address &address);
    660 
    661   void rol(Type Ty, GPRRegister reg, const Immediate &imm);
    662   void rol(Type Ty, GPRRegister operand, GPRRegister shifter);
    663   void rol(Type Ty, const Address &operand, GPRRegister shifter);
    664 
    665   void shl(Type Ty, GPRRegister reg, const Immediate &imm);
    666   void shl(Type Ty, GPRRegister operand, GPRRegister shifter);
    667   void shl(Type Ty, const Address &operand, GPRRegister shifter);
    668 
    669   void shr(Type Ty, GPRRegister reg, const Immediate &imm);
    670   void shr(Type Ty, GPRRegister operand, GPRRegister shifter);
    671   void shr(Type Ty, const Address &operand, GPRRegister shifter);
    672 
    673   void sar(Type Ty, GPRRegister reg, const Immediate &imm);
    674   void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
    675   void sar(Type Ty, const Address &address, GPRRegister shifter);
    676 
    677   void shld(Type Ty, GPRRegister dst, GPRRegister src);
    678   void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
    679   void shld(Type Ty, const Address &operand, GPRRegister src);
    680   void shrd(Type Ty, GPRRegister dst, GPRRegister src);
    681   void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
    682   void shrd(Type Ty, const Address &dst, GPRRegister src);
    683 
    684   void neg(Type Ty, GPRRegister reg);
    685   void neg(Type Ty, const Address &addr);
    686   void notl(GPRRegister reg);
    687 
    688   void bsf(Type Ty, GPRRegister dst, GPRRegister src);
    689   void bsf(Type Ty, GPRRegister dst, const Address &src);
    690   void bsr(Type Ty, GPRRegister dst, GPRRegister src);
    691   void bsr(Type Ty, GPRRegister dst, const Address &src);
    692 
    693   void bswap(Type Ty, GPRRegister reg);
    694 
    695   void bt(GPRRegister base, GPRRegister offset);
    696 
    697   void ret();
    698   void ret(const Immediate &imm);
    699 
    700   // 'size' indicates size in bytes and must be in the range 1..8.
    701   void nop(int size = 1);
    702   void int3();
    703   void hlt();
    704   void ud2();
    705 
    706   // j(Label) is fully tested.
    707   void j(BrCond condition, Label *label, bool near = kFarJump);
    708   void j(BrCond condition, const ConstantRelocatable *label); // not testable.
    709 
    710   void jmp(GPRRegister reg);
    711   void jmp(Label *label, bool near = kFarJump);
    712   void jmp(const ConstantRelocatable *label); // not testable.
    713   void jmp(const Immediate &abs_address);
    714 
    715   void mfence();
    716 
    717   void lock();
    718   void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
    719   void cmpxchg8b(const Address &address, bool Locked);
    720   void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
    721   void xchg(Type Ty, GPRRegister reg0, GPRRegister reg1);
    722   void xchg(Type Ty, const Address &address, GPRRegister reg);
    723 
    724   /// \name Intel Architecture Code Analyzer markers.
    725   /// @{
    726   void iaca_start();
    727   void iaca_end();
    728   /// @}
    729 
    730   void emitSegmentOverride(uint8_t prefix);
    731 
    732   intptr_t preferredLoopAlignment() { return 16; }
    733   void align(intptr_t alignment, intptr_t offset);
    734   void bind(Label *label);
    735 
    736   intptr_t CodeSize() const { return Buffer.size(); }
    737 
    738 protected:
    739   inline void emitUint8(uint8_t value);
    740 
    741 private:
    742   ENABLE_MAKE_UNIQUE;
    743 
    744   // EmidAddrSizeOverridePrefix directs the emission of the 0x67 prefix to
    745   // force 32-bit registers when accessing memory. This is only used in native
    746   // 64-bit.
    747   const bool EmitAddrSizeOverridePrefix;
    748 
    749   static constexpr Type RexTypeIrrelevant = IceType_i32;
    750   static constexpr Type RexTypeForceRexW = IceType_i64;
    751   static constexpr GPRRegister RexRegIrrelevant =
    752       Traits::GPRRegister::Encoded_Reg_eax;
    753 
    754   inline void emitInt16(int16_t value);
    755   inline void emitInt32(int32_t value);
    756   inline void emitRegisterOperand(int rm, int reg);
    757   template <typename RegType, typename RmType>
    758   inline void emitXmmRegisterOperand(RegType reg, RmType rm);
    759   inline void emitOperandSizeOverride();
    760 
    761   void emitOperand(int rm, const Operand &operand, RelocOffsetT Addend = 0);
    762   void emitImmediate(Type ty, const Immediate &imm);
    763   void emitComplexI8(int rm, const Operand &operand,
    764                      const Immediate &immediate);
    765   void emitComplex(Type Ty, int rm, const Operand &operand,
    766                    const Immediate &immediate);
    767   void emitLabel(Label *label, intptr_t instruction_size);
    768   void emitLabelLink(Label *label);
    769   void emitNearLabelLink(Label *label);
    770 
    771   void emitGenericShift(int rm, Type Ty, GPRRegister reg, const Immediate &imm);
    772   void emitGenericShift(int rm, Type Ty, const Operand &operand,
    773                         GPRRegister shifter);
    774 
    775   using LabelVector = std::vector<Label *>;
    776   // A vector of pool-allocated x86 labels for CFG nodes.
    777   LabelVector CfgNodeLabels;
    778   // A vector of pool-allocated x86 labels for Local labels.
    779   LabelVector LocalLabels;
    780 
    781   Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
    782 
    783   void emitAddrSizeOverridePrefix() {
    784     if (!Traits::Is64Bit || !EmitAddrSizeOverridePrefix) {
    785       return;
    786     }
    787     static constexpr uint8_t AddrSizeOverridePrefix = 0x67;
    788     emitUint8(AddrSizeOverridePrefix);
    789   }
    790 
    791   // The arith_int() methods factor out the commonality between the encodings
    792   // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
    793   // parameter is statically asserted to be less than 8.
    794   template <uint32_t Tag>
    795   void arith_int(Type Ty, GPRRegister reg, const Immediate &imm);
    796 
    797   template <uint32_t Tag>
    798   void arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1);
    799 
    800   template <uint32_t Tag>
    801   void arith_int(Type Ty, GPRRegister reg, const Address &address);
    802 
    803   template <uint32_t Tag>
    804   void arith_int(Type Ty, const Address &address, GPRRegister reg);
    805 
    806   template <uint32_t Tag>
    807   void arith_int(Type Ty, const Address &address, const Immediate &imm);
    808 
    809   // gprEncoding returns Reg encoding for operand emission. For x86-64 we mask
    810   // out the 4th bit as it is encoded in the REX.[RXB] bits. No other bits are
    811   // touched because we don't want to mask errors.
    812   template <typename RegType, typename T = Traits>
    813   typename std::enable_if<T::Is64Bit, typename T::GPRRegister>::type
    814   gprEncoding(const RegType Reg) {
    815     return static_cast<GPRRegister>(static_cast<uint8_t>(Reg) & ~0x08);
    816   }
    817 
    818   template <typename RegType, typename T = Traits>
    819   typename std::enable_if<!T::Is64Bit, typename T::GPRRegister>::type
    820   gprEncoding(const RegType Reg) {
    821     return static_cast<typename T::GPRRegister>(Reg);
    822   }
    823 
    824   template <typename RegType>
    825   bool is8BitRegisterRequiringRex(const Type Ty, const RegType Reg) {
    826     static constexpr bool IsGPR =
    827         std::is_same<typename std::decay<RegType>::type, ByteRegister>::value ||
    828         std::is_same<typename std::decay<RegType>::type, GPRRegister>::value;
    829 
    830     // At this point in the assembler, we have encoded regs, so it is not
    831     // possible to distinguish between the "new" low byte registers introduced
    832     // in x86-64 and the legacy [abcd]h registers. Because x86, we may still
    833     // see ah (div) in the assembler, so we whitelist it here.
    834     //
    835     // The "local" uint32_t Encoded_Reg_ah is needed because RegType is an
    836     // enum that is not necessarily the same type of
    837     // Traits::RegisterSet::Encoded_Reg_ah.
    838     constexpr uint32_t Encoded_Reg_ah = Traits::RegisterSet::Encoded_Reg_ah;
    839     return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
    840            isByteSizedType(Ty) && (Reg != Encoded_Reg_ah);
    841   }
    842 
    843   // assembleAndEmitRex is used for determining which (if any) rex prefix
    844   // should be emitted for the current instruction. It allows different types
    845   // for Reg and Rm because they could be of different types (e.g., in
    846   // mov[sz]x instructions.) If Addr is not nullptr, then Rm is ignored, and
    847   // Rex.B is determined by Addr instead. TyRm is still used to determine
    848   // Addr's size.
    849   template <typename RegType, typename RmType, typename T = Traits>
    850   typename std::enable_if<T::Is64Bit, void>::type
    851   assembleAndEmitRex(const Type TyReg, const RegType Reg, const Type TyRm,
    852                      const RmType Rm,
    853                      const typename T::Address *Addr = nullptr) {
    854     const uint8_t W = (TyReg == IceType_i64 || TyRm == IceType_i64)
    855                           ? T::Operand::RexW
    856                           : T::Operand::RexNone;
    857     const uint8_t R = (Reg & 0x08) ? T::Operand::RexR : T::Operand::RexNone;
    858     const uint8_t X = (Addr != nullptr)
    859                           ? (typename T::Operand::RexBits)Addr->rexX()
    860                           : T::Operand::RexNone;
    861     const uint8_t B =
    862         (Addr != nullptr)
    863             ? (typename T::Operand::RexBits)Addr->rexB()
    864             : (Rm & 0x08) ? T::Operand::RexB : T::Operand::RexNone;
    865     const uint8_t Prefix = W | R | X | B;
    866     if (Prefix != T::Operand::RexNone) {
    867       emitUint8(Prefix);
    868     } else if (is8BitRegisterRequiringRex(TyReg, Reg) ||
    869                (Addr == nullptr && is8BitRegisterRequiringRex(TyRm, Rm))) {
    870       emitUint8(T::Operand::RexBase);
    871     }
    872   }
    873 
    874   template <typename RegType, typename RmType, typename T = Traits>
    875   typename std::enable_if<!T::Is64Bit, void>::type
    876   assembleAndEmitRex(const Type, const RegType, const Type, const RmType,
    877                      const typename T::Address * = nullptr) {}
    878 
    879   // emitRexRB is used for emitting a Rex prefix instructions with two
    880   // explicit register operands in its mod-rm byte.
    881   template <typename RegType, typename RmType>
    882   void emitRexRB(const Type Ty, const RegType Reg, const RmType Rm) {
    883     assembleAndEmitRex(Ty, Reg, Ty, Rm);
    884   }
    885 
    886   template <typename RegType, typename RmType>
    887   void emitRexRB(const Type TyReg, const RegType Reg, const Type TyRm,
    888                  const RmType Rm) {
    889     assembleAndEmitRex(TyReg, Reg, TyRm, Rm);
    890   }
    891 
    892   // emitRexB is used for emitting a Rex prefix if one is needed on encoding
    893   // the Reg field in an x86 instruction. It is invoked by the template when
    894   // Reg is the single register operand in the instruction (e.g., push Reg.)
    895   template <typename RmType> void emitRexB(const Type Ty, const RmType Rm) {
    896     emitRexRB(Ty, RexRegIrrelevant, Ty, Rm);
    897   }
    898 
    899   // emitRex is used for emitting a Rex prefix for an address and a GPR. The
    900   // address may contain zero, one, or two registers.
    901   template <typename RegType>
    902   void emitRex(const Type Ty, const Address &Addr, const RegType Reg) {
    903     assembleAndEmitRex(Ty, Reg, Ty, RexRegIrrelevant, &Addr);
    904   }
    905 
    906   template <typename RegType>
    907   void emitRex(const Type AddrTy, const Address &Addr, const Type TyReg,
    908                const RegType Reg) {
    909     assembleAndEmitRex(TyReg, Reg, AddrTy, RexRegIrrelevant, &Addr);
    910   }
    911 };
    912 
    913 template <typename TraitsType>
    914 inline void AssemblerX86Base<TraitsType>::emitUint8(uint8_t value) {
    915   Buffer.emit<uint8_t>(value);
    916 }
    917 
    918 template <typename TraitsType>
    919 inline void AssemblerX86Base<TraitsType>::emitInt16(int16_t value) {
    920   Buffer.emit<int16_t>(value);
    921 }
    922 
    923 template <typename TraitsType>
    924 inline void AssemblerX86Base<TraitsType>::emitInt32(int32_t value) {
    925   Buffer.emit<int32_t>(value);
    926 }
    927 
    928 template <typename TraitsType>
    929 inline void AssemblerX86Base<TraitsType>::emitRegisterOperand(int reg, int rm) {
    930   assert(reg >= 0 && reg < 8);
    931   assert(rm >= 0 && rm < 8);
    932   Buffer.emit<uint8_t>(0xC0 + (reg << 3) + rm);
    933 }
    934 
    935 template <typename TraitsType>
    936 template <typename RegType, typename RmType>
    937 inline void AssemblerX86Base<TraitsType>::emitXmmRegisterOperand(RegType reg,
    938                                                                  RmType rm) {
    939   emitRegisterOperand(gprEncoding(reg), gprEncoding(rm));
    940 }
    941 
    942 template <typename TraitsType>
    943 inline void AssemblerX86Base<TraitsType>::emitOperandSizeOverride() {
    944   emitUint8(0x66);
    945 }
    946 
    947 } // end of namespace X86NAMESPACE
    948 
    949 } // end of namespace Ice
    950 
    951 #include "IceAssemblerX86BaseImpl.h"
    952 
    953 #endif // SUBZERO_SRC_ICEASSEMBLERX86BASE_H
    954