Home | History | Annotate | Download | only in X86
      1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file declares the X86 specific subclass of TargetSubtargetInfo.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
     15 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
     16 
     17 #include "X86FrameLowering.h"
     18 #include "X86ISelLowering.h"
     19 #include "X86InstrInfo.h"
     20 #include "X86SelectionDAGInfo.h"
     21 #include "llvm/ADT/StringRef.h"
     22 #include "llvm/ADT/Triple.h"
     23 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
     24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
     25 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
     26 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
     27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     28 #include "llvm/IR/CallingConv.h"
     29 #include "llvm/Target/TargetMachine.h"
     30 #include <climits>
     31 #include <memory>
     32 
     33 #define GET_SUBTARGETINFO_HEADER
     34 #include "X86GenSubtargetInfo.inc"
     35 
     36 namespace llvm {
     37 
     38 class GlobalValue;
     39 
     40 /// The X86 backend supports a number of different styles of PIC.
     41 ///
     42 namespace PICStyles {
     43 
     44 enum Style {
     45   StubPIC,          // Used on i386-darwin in pic mode.
     46   GOT,              // Used on 32 bit elf on when in pic mode.
     47   RIPRel,           // Used on X86-64 when in pic mode.
     48   None              // Set when not in pic mode.
     49 };
     50 
     51 } // end namespace PICStyles
     52 
     53 class X86Subtarget final : public X86GenSubtargetInfo {
     54 public:
     55   enum X86ProcFamilyEnum {
     56     Others,
     57     IntelAtom,
     58     IntelSLM,
     59     IntelGLM,
     60     IntelGLP,
     61     IntelTRM,
     62     IntelHaswell,
     63     IntelBroadwell,
     64     IntelSkylake,
     65     IntelKNL,
     66     IntelSKX,
     67     IntelCannonlake,
     68     IntelIcelakeClient,
     69     IntelIcelakeServer,
     70   };
     71 
     72 protected:
     73   enum X86SSEEnum {
     74     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
     75   };
     76 
     77   enum X863DNowEnum {
     78     NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
     79   };
     80 
     81   /// X86 processor family: Intel Atom, and others
     82   X86ProcFamilyEnum X86ProcFamily = Others;
     83 
     84   /// Which PIC style to use
     85   PICStyles::Style PICStyle;
     86 
     87   const TargetMachine &TM;
     88 
     89   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
     90   X86SSEEnum X86SSELevel = NoSSE;
     91 
     92   /// MMX, 3DNow, 3DNow Athlon, or none supported.
     93   X863DNowEnum X863DNowLevel = NoThreeDNow;
     94 
     95   /// True if the processor supports X87 instructions.
     96   bool HasX87 = false;
     97 
     98   /// True if this processor has NOPL instruction
     99   /// (generally pentium pro+).
    100   bool HasNOPL = false;
    101 
    102   /// True if this processor has conditional move instructions
    103   /// (generally pentium pro+).
    104   bool HasCMov = false;
    105 
    106   /// True if the processor supports X86-64 instructions.
    107   bool HasX86_64 = false;
    108 
    109   /// True if the processor supports POPCNT.
    110   bool HasPOPCNT = false;
    111 
    112   /// True if the processor supports SSE4A instructions.
    113   bool HasSSE4A = false;
    114 
    115   /// Target has AES instructions
    116   bool HasAES = false;
    117   bool HasVAES = false;
    118 
    119   /// Target has FXSAVE/FXRESTOR instructions
    120   bool HasFXSR = false;
    121 
    122   /// Target has XSAVE instructions
    123   bool HasXSAVE = false;
    124 
    125   /// Target has XSAVEOPT instructions
    126   bool HasXSAVEOPT = false;
    127 
    128   /// Target has XSAVEC instructions
    129   bool HasXSAVEC = false;
    130 
    131   /// Target has XSAVES instructions
    132   bool HasXSAVES = false;
    133 
    134   /// Target has carry-less multiplication
    135   bool HasPCLMUL = false;
    136   bool HasVPCLMULQDQ = false;
    137 
    138   /// Target has Galois Field Arithmetic instructions
    139   bool HasGFNI = false;
    140 
    141   /// Target has 3-operand fused multiply-add
    142   bool HasFMA = false;
    143 
    144   /// Target has 4-operand fused multiply-add
    145   bool HasFMA4 = false;
    146 
    147   /// Target has XOP instructions
    148   bool HasXOP = false;
    149 
    150   /// Target has TBM instructions.
    151   bool HasTBM = false;
    152 
    153   /// Target has LWP instructions
    154   bool HasLWP = false;
    155 
    156   /// True if the processor has the MOVBE instruction.
    157   bool HasMOVBE = false;
    158 
    159   /// True if the processor has the RDRAND instruction.
    160   bool HasRDRAND = false;
    161 
    162   /// Processor has 16-bit floating point conversion instructions.
    163   bool HasF16C = false;
    164 
    165   /// Processor has FS/GS base insturctions.
    166   bool HasFSGSBase = false;
    167 
    168   /// Processor has LZCNT instruction.
    169   bool HasLZCNT = false;
    170 
    171   /// Processor has BMI1 instructions.
    172   bool HasBMI = false;
    173 
    174   /// Processor has BMI2 instructions.
    175   bool HasBMI2 = false;
    176 
    177   /// Processor has VBMI instructions.
    178   bool HasVBMI = false;
    179 
    180   /// Processor has VBMI2 instructions.
    181   bool HasVBMI2 = false;
    182 
    183   /// Processor has Integer Fused Multiply Add
    184   bool HasIFMA = false;
    185 
    186   /// Processor has RTM instructions.
    187   bool HasRTM = false;
    188 
    189   /// Processor has ADX instructions.
    190   bool HasADX = false;
    191 
    192   /// Processor has SHA instructions.
    193   bool HasSHA = false;
    194 
    195   /// Processor has PRFCHW instructions.
    196   bool HasPRFCHW = false;
    197 
    198   /// Processor has RDSEED instructions.
    199   bool HasRDSEED = false;
    200 
    201   /// Processor has LAHF/SAHF instructions.
    202   bool HasLAHFSAHF = false;
    203 
    204   /// Processor has MONITORX/MWAITX instructions.
    205   bool HasMWAITX = false;
    206 
    207   /// Processor has Cache Line Zero instruction
    208   bool HasCLZERO = false;
    209 
    210   /// Processor has Cache Line Demote instruction
    211   bool HasCLDEMOTE = false;
    212 
    213   /// Processor has MOVDIRI instruction (direct store integer).
    214   bool HasMOVDIRI = false;
    215 
    216   /// Processor has MOVDIR64B instruction (direct store 64 bytes).
    217   bool HasMOVDIR64B = false;
    218 
    219   /// Processor has ptwrite instruction.
    220   bool HasPTWRITE = false;
    221 
    222   /// Processor has Prefetch with intent to Write instruction
    223   bool HasPREFETCHWT1 = false;
    224 
    225   /// True if SHLD instructions are slow.
    226   bool IsSHLDSlow = false;
    227 
    228   /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
    229   //  PMULUDQ.
    230   bool IsPMULLDSlow = false;
    231 
    232   /// True if unaligned memory accesses of 16-bytes are slow.
    233   bool IsUAMem16Slow = false;
    234 
    235   /// True if unaligned memory accesses of 32-bytes are slow.
    236   bool IsUAMem32Slow = false;
    237 
    238   /// True if SSE operations can have unaligned memory operands.
    239   /// This may require setting a configuration bit in the processor.
    240   bool HasSSEUnalignedMem = false;
    241 
    242   /// True if this processor has the CMPXCHG16B instruction;
    243   /// this is true for most x86-64 chips, but not the first AMD chips.
    244   bool HasCmpxchg16b = false;
    245 
    246   /// True if the LEA instruction should be used for adjusting
    247   /// the stack pointer. This is an optimization for Intel Atom processors.
    248   bool UseLeaForSP = false;
    249 
    250   /// True if POPCNT instruction has a false dependency on the destination register.
    251   bool HasPOPCNTFalseDeps = false;
    252 
    253   /// True if LZCNT/TZCNT instructions have a false dependency on the destination register.
    254   bool HasLZCNTFalseDeps = false;
    255 
    256   /// True if its preferable to combine to a single shuffle using a variable
    257   /// mask over multiple fixed shuffles.
    258   bool HasFastVariableShuffle = false;
    259 
    260   /// True if there is no performance penalty to writing only the lower parts
    261   /// of a YMM or ZMM register without clearing the upper part.
    262   bool HasFastPartialYMMorZMMWrite = false;
    263 
    264   /// True if there is no performance penalty for writing NOPs with up to
    265   /// 11 bytes.
    266   bool HasFast11ByteNOP = false;
    267 
    268   /// True if there is no performance penalty for writing NOPs with up to
    269   /// 15 bytes.
    270   bool HasFast15ByteNOP = false;
    271 
    272   /// True if gather is reasonably fast. This is true for Skylake client and
    273   /// all AVX-512 CPUs.
    274   bool HasFastGather = false;
    275 
    276   /// True if hardware SQRTSS instruction is at least as fast (latency) as
    277   /// RSQRTSS followed by a Newton-Raphson iteration.
    278   bool HasFastScalarFSQRT = false;
    279 
    280   /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
    281   /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
    282   bool HasFastVectorFSQRT = false;
    283 
    284   /// True if 8-bit divisions are significantly faster than
    285   /// 32-bit divisions and should be used when possible.
    286   bool HasSlowDivide32 = false;
    287 
    288   /// True if 32-bit divides are significantly faster than
    289   /// 64-bit divisions and should be used when possible.
    290   bool HasSlowDivide64 = false;
    291 
    292   /// True if LZCNT instruction is fast.
    293   bool HasFastLZCNT = false;
    294 
    295   /// True if SHLD based rotate is fast.
    296   bool HasFastSHLDRotate = false;
    297 
    298   /// True if the processor supports macrofusion.
    299   bool HasMacroFusion = false;
    300 
    301   /// True if the processor has enhanced REP MOVSB/STOSB.
    302   bool HasERMSB = false;
    303 
    304   /// True if the short functions should be padded to prevent
    305   /// a stall when returning too early.
    306   bool PadShortFunctions = false;
    307 
    308   /// True if two memory operand instructions should use a temporary register
    309   /// instead.
    310   bool SlowTwoMemOps = false;
    311 
    312   /// True if the LEA instruction inputs have to be ready at address generation
    313   /// (AG) time.
    314   bool LEAUsesAG = false;
    315 
    316   /// True if the LEA instruction with certain arguments is slow
    317   bool SlowLEA = false;
    318 
    319   /// True if the LEA instruction has all three source operands: base, index,
    320   /// and offset or if the LEA instruction uses base and index registers where
    321   /// the base is EBP, RBP,or R13
    322   bool Slow3OpsLEA = false;
    323 
    324   /// True if INC and DEC instructions are slow when writing to flags
    325   bool SlowIncDec = false;
    326 
    327   /// Processor has AVX-512 PreFetch Instructions
    328   bool HasPFI = false;
    329 
    330   /// Processor has AVX-512 Exponential and Reciprocal Instructions
    331   bool HasERI = false;
    332 
    333   /// Processor has AVX-512 Conflict Detection Instructions
    334   bool HasCDI = false;
    335 
    336   /// Processor has AVX-512 population count Instructions
    337   bool HasVPOPCNTDQ = false;
    338 
    339   /// Processor has AVX-512 Doubleword and Quadword instructions
    340   bool HasDQI = false;
    341 
    342   /// Processor has AVX-512 Byte and Word instructions
    343   bool HasBWI = false;
    344 
    345   /// Processor has AVX-512 Vector Length eXtenstions
    346   bool HasVLX = false;
    347 
    348   /// Processor has PKU extenstions
    349   bool HasPKU = false;
    350 
    351   /// Processor has AVX-512 Vector Neural Network Instructions
    352   bool HasVNNI = false;
    353 
    354   /// Processor has AVX-512 Bit Algorithms instructions
    355   bool HasBITALG = false;
    356 
    357   /// Processor supports MPX - Memory Protection Extensions
    358   bool HasMPX = false;
    359 
    360   /// Processor supports CET SHSTK - Control-Flow Enforcement Technology
    361   /// using Shadow Stack
    362   bool HasSHSTK = false;
    363 
    364   /// Processor supports Invalidate Process-Context Identifier
    365   bool HasINVPCID = false;
    366 
    367   /// Processor has Software Guard Extensions
    368   bool HasSGX = false;
    369 
    370   /// Processor supports Flush Cache Line instruction
    371   bool HasCLFLUSHOPT = false;
    372 
    373   /// Processor supports Cache Line Write Back instruction
    374   bool HasCLWB = false;
    375 
    376   /// Processor supports Write Back No Invalidate instruction
    377   bool HasWBNOINVD = false;
    378 
    379   /// Processor support RDPID instruction
    380   bool HasRDPID = false;
    381 
    382   /// Processor supports WaitPKG instructions
    383   bool HasWAITPKG = false;
    384 
    385   /// Processor supports PCONFIG instruction
    386   bool HasPCONFIG = false;
    387 
    388   /// Use a retpoline thunk rather than indirect calls to block speculative
    389   /// execution.
    390   bool UseRetpoline = false;
    391 
    392   /// When using a retpoline thunk, call an externally provided thunk rather
    393   /// than emitting one inside the compiler.
    394   bool UseRetpolineExternalThunk = false;
    395 
    396   /// Use software floating point for code generation.
    397   bool UseSoftFloat = false;
    398 
    399   /// The minimum alignment known to hold of the stack frame on
    400   /// entry to the function and which must be maintained by every function.
    401   unsigned stackAlignment = 4;
    402 
    403   /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
    404   ///
    405   // FIXME: this is a known good value for Yonah. How about others?
    406   unsigned MaxInlineSizeThreshold = 128;
    407 
    408   /// Indicates target prefers 256 bit instructions.
    409   bool Prefer256Bit = false;
    410 
    411   /// What processor and OS we're targeting.
    412   Triple TargetTriple;
    413 
    414   /// GlobalISel related APIs.
    415   std::unique_ptr<CallLowering> CallLoweringInfo;
    416   std::unique_ptr<LegalizerInfo> Legalizer;
    417   std::unique_ptr<RegisterBankInfo> RegBankInfo;
    418   std::unique_ptr<InstructionSelector> InstSelector;
    419 
    420 private:
    421   /// Override the stack alignment.
    422   unsigned StackAlignOverride;
    423 
    424   /// Preferred vector width from function attribute.
    425   unsigned PreferVectorWidthOverride;
    426 
    427   /// Resolved preferred vector width from function attribute and subtarget
    428   /// features.
    429   unsigned PreferVectorWidth = UINT32_MAX;
    430 
    431   /// Required vector width from function attribute.
    432   unsigned RequiredVectorWidth;
    433 
    434   /// True if compiling for 64-bit, false for 16-bit or 32-bit.
    435   bool In64BitMode;
    436 
    437   /// True if compiling for 32-bit, false for 16-bit or 64-bit.
    438   bool In32BitMode;
    439 
    440   /// True if compiling for 16-bit, false for 32-bit or 64-bit.
    441   bool In16BitMode;
    442 
    443   /// Contains the Overhead of gather\scatter instructions
    444   int GatherOverhead = 1024;
    445   int ScatterOverhead = 1024;
    446 
    447   X86SelectionDAGInfo TSInfo;
    448   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
    449   // X86TargetLowering needs.
    450   X86InstrInfo InstrInfo;
    451   X86TargetLowering TLInfo;
    452   X86FrameLowering FrameLowering;
    453 
    454 public:
    455   /// This constructor initializes the data members to match that
    456   /// of the specified triple.
    457   ///
    458   X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
    459                const X86TargetMachine &TM, unsigned StackAlignOverride,
    460                unsigned PreferVectorWidthOverride,
    461                unsigned RequiredVectorWidth);
    462 
    463   const X86TargetLowering *getTargetLowering() const override {
    464     return &TLInfo;
    465   }
    466 
    467   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
    468 
    469   const X86FrameLowering *getFrameLowering() const override {
    470     return &FrameLowering;
    471   }
    472 
    473   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
    474     return &TSInfo;
    475   }
    476 
    477   const X86RegisterInfo *getRegisterInfo() const override {
    478     return &getInstrInfo()->getRegisterInfo();
    479   }
    480 
    481   /// Returns the minimum alignment known to hold of the
    482   /// stack frame on entry to the function and which must be maintained by every
    483   /// function for this subtarget.
    484   unsigned getStackAlignment() const { return stackAlignment; }
    485 
    486   /// Returns the maximum memset / memcpy size
    487   /// that still makes it profitable to inline the call.
    488   unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
    489 
    490   /// ParseSubtargetFeatures - Parses features string setting specified
    491   /// subtarget options.  Definition of function is auto generated by tblgen.
    492   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
    493 
    494   /// Methods used by Global ISel
    495   const CallLowering *getCallLowering() const override;
    496   const InstructionSelector *getInstructionSelector() const override;
    497   const LegalizerInfo *getLegalizerInfo() const override;
    498   const RegisterBankInfo *getRegBankInfo() const override;
    499 
    500 private:
    501   /// Initialize the full set of dependencies so we can use an initializer
    502   /// list for X86Subtarget.
    503   X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
    504   void initSubtargetFeatures(StringRef CPU, StringRef FS);
    505 
    506 public:
    507   /// Is this x86_64? (disregarding specific ABI / programming model)
    508   bool is64Bit() const {
    509     return In64BitMode;
    510   }
    511 
    512   bool is32Bit() const {
    513     return In32BitMode;
    514   }
    515 
    516   bool is16Bit() const {
    517     return In16BitMode;
    518   }
    519 
    520   /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
    521   bool isTarget64BitILP32() const {
    522     return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32 ||
    523                            TargetTriple.isOSNaCl());
    524   }
    525 
    526   /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
    527   bool isTarget64BitLP64() const {
    528     return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 &&
    529                            !TargetTriple.isOSNaCl());
    530   }
    531 
    532   PICStyles::Style getPICStyle() const { return PICStyle; }
    533   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
    534 
    535   bool hasX87() const { return HasX87; }
    536   bool hasNOPL() const { return HasNOPL; }
    537   bool hasCMov() const { return HasCMov; }
    538   bool hasSSE1() const { return X86SSELevel >= SSE1; }
    539   bool hasSSE2() const { return X86SSELevel >= SSE2; }
    540   bool hasSSE3() const { return X86SSELevel >= SSE3; }
    541   bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
    542   bool hasSSE41() const { return X86SSELevel >= SSE41; }
    543   bool hasSSE42() const { return X86SSELevel >= SSE42; }
    544   bool hasAVX() const { return X86SSELevel >= AVX; }
    545   bool hasAVX2() const { return X86SSELevel >= AVX2; }
    546   bool hasAVX512() const { return X86SSELevel >= AVX512F; }
    547   bool hasInt256() const { return hasAVX2(); }
    548   bool hasSSE4A() const { return HasSSE4A; }
    549   bool hasMMX() const { return X863DNowLevel >= MMX; }
    550   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
    551   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
    552   bool hasPOPCNT() const { return HasPOPCNT; }
    553   bool hasAES() const { return HasAES; }
    554   bool hasVAES() const { return HasVAES; }
    555   bool hasFXSR() const { return HasFXSR; }
    556   bool hasXSAVE() const { return HasXSAVE; }
    557   bool hasXSAVEOPT() const { return HasXSAVEOPT; }
    558   bool hasXSAVEC() const { return HasXSAVEC; }
    559   bool hasXSAVES() const { return HasXSAVES; }
    560   bool hasPCLMUL() const { return HasPCLMUL; }
    561   bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
    562   bool hasGFNI() const { return HasGFNI; }
    563   // Prefer FMA4 to FMA - its better for commutation/memory folding and
    564   // has equal or better performance on all supported targets.
    565   bool hasFMA() const { return HasFMA; }
    566   bool hasFMA4() const { return HasFMA4; }
    567   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
    568   bool hasXOP() const { return HasXOP; }
    569   bool hasTBM() const { return HasTBM; }
    570   bool hasLWP() const { return HasLWP; }
    571   bool hasMOVBE() const { return HasMOVBE; }
    572   bool hasRDRAND() const { return HasRDRAND; }
    573   bool hasF16C() const { return HasF16C; }
    574   bool hasFSGSBase() const { return HasFSGSBase; }
    575   bool hasLZCNT() const { return HasLZCNT; }
    576   bool hasBMI() const { return HasBMI; }
    577   bool hasBMI2() const { return HasBMI2; }
    578   bool hasVBMI() const { return HasVBMI; }
    579   bool hasVBMI2() const { return HasVBMI2; }
    580   bool hasIFMA() const { return HasIFMA; }
    581   bool hasRTM() const { return HasRTM; }
    582   bool hasADX() const { return HasADX; }
    583   bool hasSHA() const { return HasSHA; }
    584   bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
    585   bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
    586   bool hasSSEPrefetch() const {
    587     // We implicitly enable these when we have a write prefix supporting cache
    588     // level OR if we have prfchw, but don't already have a read prefetch from
    589     // 3dnow.
    590     return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
    591   }
    592   bool hasRDSEED() const { return HasRDSEED; }
    593   bool hasLAHFSAHF() const { return HasLAHFSAHF; }
    594   bool hasMWAITX() const { return HasMWAITX; }
    595   bool hasCLZERO() const { return HasCLZERO; }
    596   bool hasCLDEMOTE() const { return HasCLDEMOTE; }
    597   bool hasMOVDIRI() const { return HasMOVDIRI; }
    598   bool hasMOVDIR64B() const { return HasMOVDIR64B; }
    599   bool hasPTWRITE() const { return HasPTWRITE; }
    600   bool isSHLDSlow() const { return IsSHLDSlow; }
    601   bool isPMULLDSlow() const { return IsPMULLDSlow; }
    602   bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
    603   bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
    604   int getGatherOverhead() const { return GatherOverhead; }
    605   int getScatterOverhead() const { return ScatterOverhead; }
    606   bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
    607   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
    608   bool useLeaForSP() const { return UseLeaForSP; }
    609   bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
    610   bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
    611   bool hasFastVariableShuffle() const {
    612     return HasFastVariableShuffle;
    613   }
    614   bool hasFastPartialYMMorZMMWrite() const {
    615     return HasFastPartialYMMorZMMWrite;
    616   }
    617   bool hasFastGather() const { return HasFastGather; }
    618   bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
    619   bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
    620   bool hasFastLZCNT() const { return HasFastLZCNT; }
    621   bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
    622   bool hasMacroFusion() const { return HasMacroFusion; }
    623   bool hasERMSB() const { return HasERMSB; }
    624   bool hasSlowDivide32() const { return HasSlowDivide32; }
    625   bool hasSlowDivide64() const { return HasSlowDivide64; }
    626   bool padShortFunctions() const { return PadShortFunctions; }
    627   bool slowTwoMemOps() const { return SlowTwoMemOps; }
    628   bool LEAusesAG() const { return LEAUsesAG; }
    629   bool slowLEA() const { return SlowLEA; }
    630   bool slow3OpsLEA() const { return Slow3OpsLEA; }
    631   bool slowIncDec() const { return SlowIncDec; }
    632   bool hasCDI() const { return HasCDI; }
    633   bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
    634   bool hasPFI() const { return HasPFI; }
    635   bool hasERI() const { return HasERI; }
    636   bool hasDQI() const { return HasDQI; }
    637   bool hasBWI() const { return HasBWI; }
    638   bool hasVLX() const { return HasVLX; }
    639   bool hasPKU() const { return HasPKU; }
    640   bool hasVNNI() const { return HasVNNI; }
    641   bool hasBITALG() const { return HasBITALG; }
    642   bool hasMPX() const { return HasMPX; }
    643   bool hasSHSTK() const { return HasSHSTK; }
    644   bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
    645   bool hasCLWB() const { return HasCLWB; }
    646   bool hasWBNOINVD() const { return HasWBNOINVD; }
    647   bool hasRDPID() const { return HasRDPID; }
    648   bool hasWAITPKG() const { return HasWAITPKG; }
    649   bool hasPCONFIG() const { return HasPCONFIG; }
    650   bool hasSGX() const { return HasSGX; }
    651   bool hasINVPCID() const { return HasINVPCID; }
    652   bool useRetpoline() const { return UseRetpoline; }
    653   bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
    654 
    655   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
    656   unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
    657 
    658   // Helper functions to determine when we should allow widening to 512-bit
    659   // during codegen.
    660   // TODO: Currently we're always allowing widening on CPUs without VLX,
    661   // because for many cases we don't have a better option.
    662   bool canExtendTo512DQ() const {
    663     return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
    664   }
    665   bool canExtendTo512BW() const  {
    666     return hasBWI() && canExtendTo512DQ();
    667   }
    668 
    669   // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
    670   // disable them in the legalizer.
    671   bool useAVX512Regs() const {
    672     return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
    673   }
    674 
    675   bool useBWIRegs() const {
    676     return hasBWI() && useAVX512Regs();
    677   }
    678 
    679   bool isXRaySupported() const override { return is64Bit(); }
    680 
    681   X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
    682 
    683   /// TODO: to be removed later and replaced with suitable properties
    684   bool isAtom() const { return X86ProcFamily == IntelAtom; }
    685   bool isSLM() const { return X86ProcFamily == IntelSLM; }
    686   bool isGLM() const {
    687     return X86ProcFamily == IntelGLM ||
    688            X86ProcFamily == IntelGLP ||
    689            X86ProcFamily == IntelTRM;
    690   }
    691   bool useSoftFloat() const { return UseSoftFloat; }
    692 
    693   /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
    694   /// no-sse2). There isn't any reason to disable it if the target processor
    695   /// supports it.
    696   bool hasMFence() const { return hasSSE2() || is64Bit(); }
    697 
    698   const Triple &getTargetTriple() const { return TargetTriple; }
    699 
    700   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
    701   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
    702   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
    703   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
    704   bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
    705 
    706   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
    707   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
    708   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
    709 
    710   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
    711   bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
    712   bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
    713   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
    714   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
    715   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
    716   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
    717   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
    718   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
    719 
    720   bool isTargetWindowsMSVC() const {
    721     return TargetTriple.isWindowsMSVCEnvironment();
    722   }
    723 
    724   bool isTargetKnownWindowsMSVC() const {
    725     return TargetTriple.isKnownWindowsMSVCEnvironment();
    726   }
    727 
    728   bool isTargetWindowsCoreCLR() const {
    729     return TargetTriple.isWindowsCoreCLREnvironment();
    730   }
    731 
    732   bool isTargetWindowsCygwin() const {
    733     return TargetTriple.isWindowsCygwinEnvironment();
    734   }
    735 
    736   bool isTargetWindowsGNU() const {
    737     return TargetTriple.isWindowsGNUEnvironment();
    738   }
    739 
    740   bool isTargetWindowsItanium() const {
    741     return TargetTriple.isWindowsItaniumEnvironment();
    742   }
    743 
    744   bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
    745 
    746   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
    747 
    748   bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
    749 
    750   bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
    751 
    752   bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
    753   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
    754 
    755   bool isPICStyleStubPIC() const {
    756     return PICStyle == PICStyles::StubPIC;
    757   }
    758 
    759   bool isPositionIndependent() const { return TM.isPositionIndependent(); }
    760 
    761   bool isCallingConvWin64(CallingConv::ID CC) const {
    762     switch (CC) {
    763     // On Win64, all these conventions just use the default convention.
    764     case CallingConv::C:
    765     case CallingConv::Fast:
    766     case CallingConv::Swift:
    767     case CallingConv::X86_FastCall:
    768     case CallingConv::X86_StdCall:
    769     case CallingConv::X86_ThisCall:
    770     case CallingConv::X86_VectorCall:
    771     case CallingConv::Intel_OCL_BI:
    772       return isTargetWin64();
    773     // This convention allows using the Win64 convention on other targets.
    774     case CallingConv::Win64:
    775       return true;
    776     // This convention allows using the SysV convention on Windows targets.
    777     case CallingConv::X86_64_SysV:
    778       return false;
    779     // Otherwise, who knows what this is.
    780     default:
    781       return false;
    782     }
    783   }
    784 
    785   /// Classify a global variable reference for the current subtarget according
    786   /// to how we should reference it in a non-pcrel context.
    787   unsigned char classifyLocalReference(const GlobalValue *GV) const;
    788 
    789   unsigned char classifyGlobalReference(const GlobalValue *GV,
    790                                         const Module &M) const;
    791   unsigned char classifyGlobalReference(const GlobalValue *GV) const;
    792 
    793   /// Classify a global function reference for the current subtarget.
    794   unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
    795                                                 const Module &M) const;
    796   unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;
    797 
    798   /// Classify a blockaddress reference for the current subtarget according to
    799   /// how we should reference it in a non-pcrel context.
    800   unsigned char classifyBlockAddressReference() const;
    801 
    802   /// Return true if the subtarget allows calls to immediate address.
    803   bool isLegalToCallImmediateAddr() const;
    804 
    805   /// If we are using retpolines, we need to expand indirectbr to avoid it
    806   /// lowering to an actual indirect jump.
    807   bool enableIndirectBrExpand() const override { return useRetpoline(); }
    808 
    809   /// Enable the MachineScheduler pass for all X86 subtargets.
    810   bool enableMachineScheduler() const override { return true; }
    811 
    812   // TODO: Update the regression tests and return true.
    813   bool supportPrintSchedInfo() const override { return false; }
    814 
    815   bool enableEarlyIfConversion() const override;
    816 
    817   AntiDepBreakMode getAntiDepBreakMode() const override {
    818     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
    819   }
    820 
    821   bool enableAdvancedRASplitCost() const override { return true; }
    822 };
    823 
    824 } // end namespace llvm
    825 
    826 #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
    827