Home | History | Annotate | Download | only in AMDGPU
      1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// AMDGPU specific subclass of TargetSubtarget.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
     16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
     17 
     18 #include "AMDGPU.h"
     19 #include "AMDGPUCallLowering.h"
     20 #include "R600FrameLowering.h"
     21 #include "R600ISelLowering.h"
     22 #include "R600InstrInfo.h"
     23 #include "SIFrameLowering.h"
     24 #include "SIISelLowering.h"
     25 #include "SIInstrInfo.h"
     26 #include "Utils/AMDGPUBaseInfo.h"
     27 #include "llvm/ADT/Triple.h"
     28 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
     29 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
     30 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
     31 #include "llvm/CodeGen/MachineFunction.h"
     32 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
     33 #include "llvm/MC/MCInstrItineraries.h"
     34 #include "llvm/Support/MathExtras.h"
     35 #include <cassert>
     36 #include <cstdint>
     37 #include <memory>
     38 #include <utility>
     39 
     40 #define GET_SUBTARGETINFO_HEADER
     41 #include "AMDGPUGenSubtargetInfo.inc"
     42 #define GET_SUBTARGETINFO_HEADER
     43 #include "R600GenSubtargetInfo.inc"
     44 
     45 namespace llvm {
     46 
     47 class StringRef;
     48 
     49 class AMDGPUSubtarget {
     50 public:
     51   enum Generation {
     52     R600 = 0,
     53     R700 = 1,
     54     EVERGREEN = 2,
     55     NORTHERN_ISLANDS = 3,
     56     SOUTHERN_ISLANDS = 4,
     57     SEA_ISLANDS = 5,
     58     VOLCANIC_ISLANDS = 6,
     59     GFX9 = 7
     60   };
     61 
     62 private:
     63   Triple TargetTriple;
     64 
     65 protected:
     66   const FeatureBitset &SubtargetFeatureBits;
     67   bool Has16BitInsts;
     68   bool HasMadMixInsts;
     69   bool FP32Denormals;
     70   bool FPExceptions;
     71   bool HasSDWA;
     72   bool HasVOP3PInsts;
     73   bool HasMulI24;
     74   bool HasMulU24;
     75   bool HasFminFmaxLegacy;
     76   bool EnablePromoteAlloca;
     77   int LocalMemorySize;
     78   unsigned WavefrontSize;
     79 
     80 public:
     81   AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
     82 
     83   static const AMDGPUSubtarget &get(const MachineFunction &MF);
     84   static const AMDGPUSubtarget &get(const TargetMachine &TM,
     85                                     const Function &F);
     86 
     87   /// \returns Default range flat work group size for a calling convention.
     88   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
     89 
     90   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
     91   /// for function \p F, or minimum/maximum flat work group sizes explicitly
     92   /// requested using "amdgpu-flat-work-group-size" attribute attached to
     93   /// function \p F.
     94   ///
     95   /// \returns Subtarget's default values if explicitly requested values cannot
     96   /// be converted to integer, or violate subtarget's specifications.
     97   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
     98 
     99   /// \returns Subtarget's default pair of minimum/maximum number of waves per
    100   /// execution unit for function \p F, or minimum/maximum number of waves per
    101   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
    102   /// attached to function \p F.
    103   ///
    104   /// \returns Subtarget's default values if explicitly requested values cannot
    105   /// be converted to integer, violate subtarget's specifications, or are not
    106   /// compatible with minimum/maximum number of waves limited by flat work group
    107   /// size, register usage, and/or lds usage.
    108   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
    109 
    110   /// Return the amount of LDS that can be used that will not restrict the
    111   /// occupancy lower than WaveCount.
    112   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
    113                                            const Function &) const;
    114 
    115   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
    116   /// the given LDS memory size is the only constraint.
    117   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
    118 
    119   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
    120 
    121   bool isAmdHsaOS() const {
    122     return TargetTriple.getOS() == Triple::AMDHSA;
    123   }
    124 
    125   bool isAmdPalOS() const {
    126     return TargetTriple.getOS() == Triple::AMDPAL;
    127   }
    128 
    129   bool isMesa3DOS() const {
    130     return TargetTriple.getOS() == Triple::Mesa3D;
    131   }
    132 
    133   bool isMesaKernel(const Function &F) const {
    134     return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
    135   }
    136 
    137   bool isAmdCodeObjectV2(const Function &F) const {
    138     return isAmdHsaOS() || isMesaKernel(F);
    139   }
    140 
    141   bool has16BitInsts() const {
    142     return Has16BitInsts;
    143   }
    144 
    145   bool hasMadMixInsts() const {
    146     return HasMadMixInsts;
    147   }
    148 
    149   bool hasFP32Denormals() const {
    150     return FP32Denormals;
    151   }
    152 
    153   bool hasFPExceptions() const {
    154     return FPExceptions;
    155   }
    156 
    157   bool hasSDWA() const {
    158     return HasSDWA;
    159   }
    160 
    161   bool hasVOP3PInsts() const {
    162     return HasVOP3PInsts;
    163   }
    164 
    165   bool hasMulI24() const {
    166     return HasMulI24;
    167   }
    168 
    169   bool hasMulU24() const {
    170     return HasMulU24;
    171   }
    172 
    173   bool hasFminFmaxLegacy() const {
    174     return HasFminFmaxLegacy;
    175   }
    176 
    177   bool isPromoteAllocaEnabled() const {
    178     return EnablePromoteAlloca;
    179   }
    180 
    181   unsigned getWavefrontSize() const {
    182     return WavefrontSize;
    183   }
    184 
    185   int getLocalMemorySize() const {
    186     return LocalMemorySize;
    187   }
    188 
    189   unsigned getAlignmentForImplicitArgPtr() const {
    190     return isAmdHsaOS() ? 8 : 4;
    191   }
    192 
    193   /// Returns the offset in bytes from the start of the input buffer
    194   ///        of the first explicit kernel argument.
    195   unsigned getExplicitKernelArgOffset(const Function &F) const {
    196     return isAmdCodeObjectV2(F) ? 0 : 36;
    197   }
    198 
    199   /// \returns Maximum number of work groups per compute unit supported by the
    200   /// subtarget and limited by given \p FlatWorkGroupSize.
    201   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
    202     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
    203                                                   FlatWorkGroupSize);
    204   }
    205 
    206   /// \returns Minimum flat work group size supported by the subtarget.
    207   unsigned getMinFlatWorkGroupSize() const {
    208     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
    209   }
    210 
    211   /// \returns Maximum flat work group size supported by the subtarget.
    212   unsigned getMaxFlatWorkGroupSize() const {
    213     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
    214   }
    215 
    216   /// \returns Maximum number of waves per execution unit supported by the
    217   /// subtarget and limited by given \p FlatWorkGroupSize.
    218   unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
    219     return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
    220                                              FlatWorkGroupSize);
    221   }
    222 
    223   /// \returns Minimum number of waves per execution unit supported by the
    224   /// subtarget.
    225   unsigned getMinWavesPerEU() const {
    226     return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
    227   }
    228 
    229   unsigned getMaxWavesPerEU() const { return 10; }
    230 
    231   /// Creates value range metadata on an workitemid.* inrinsic call or load.
    232   bool makeLIDRangeMetadata(Instruction *I) const;
    233 
    234   /// \returns Number of bytes of arguments that are passed to a shader or
    235   /// kernel in addition to the explicit ones declared for the function.
    236   unsigned getImplicitArgNumBytes(const Function &F) const {
    237     if (isMesaKernel(F))
    238       return 16;
    239     return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
    240   }
    241   uint64_t getExplicitKernArgSize(const Function &F,
    242                                   unsigned &MaxAlign) const;
    243   unsigned getKernArgSegmentSize(const Function &F,
    244                                  unsigned &MaxAlign) const;
    245 
    246   virtual ~AMDGPUSubtarget() {}
    247 };
    248 
    249 class GCNSubtarget : public AMDGPUGenSubtargetInfo,
    250                      public AMDGPUSubtarget {
    251 public:
    252   enum {
    253     ISAVersion0_0_0,
    254     ISAVersion6_0_0,
    255     ISAVersion6_0_1,
    256     ISAVersion7_0_0,
    257     ISAVersion7_0_1,
    258     ISAVersion7_0_2,
    259     ISAVersion7_0_3,
    260     ISAVersion7_0_4,
    261     ISAVersion8_0_1,
    262     ISAVersion8_0_2,
    263     ISAVersion8_0_3,
    264     ISAVersion8_1_0,
    265     ISAVersion9_0_0,
    266     ISAVersion9_0_2,
    267     ISAVersion9_0_4,
    268     ISAVersion9_0_6,
    269   };
    270 
    271   enum TrapHandlerAbi {
    272     TrapHandlerAbiNone = 0,
    273     TrapHandlerAbiHsa = 1
    274   };
    275 
    276   enum TrapID {
    277     TrapIDHardwareReserved = 0,
    278     TrapIDHSADebugTrap = 1,
    279     TrapIDLLVMTrap = 2,
    280     TrapIDLLVMDebugTrap = 3,
    281     TrapIDDebugBreakpoint = 7,
    282     TrapIDDebugReserved8 = 8,
    283     TrapIDDebugReservedFE = 0xfe,
    284     TrapIDDebugReservedFF = 0xff
    285   };
    286 
    287   enum TrapRegValues {
    288     LLVMTrapHandlerRegValue = 1
    289   };
    290 
    291 private:
    292   /// GlobalISel related APIs.
    293   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
    294   std::unique_ptr<InstructionSelector> InstSelector;
    295   std::unique_ptr<LegalizerInfo> Legalizer;
    296   std::unique_ptr<RegisterBankInfo> RegBankInfo;
    297 
    298 protected:
    299   // Basic subtarget description.
    300   Triple TargetTriple;
    301   unsigned Gen;
    302   unsigned IsaVersion;
    303   int LDSBankCount;
    304   unsigned MaxPrivateElementSize;
    305 
    306   // Possibly statically set by tablegen, but may want to be overridden.
    307   bool FastFMAF32;
    308   bool HalfRate64Ops;
    309 
    310   // Dynamially set bits that enable features.
    311   bool FP64FP16Denormals;
    312   bool DX10Clamp;
    313   bool FlatForGlobal;
    314   bool AutoWaitcntBeforeBarrier;
    315   bool CodeObjectV3;
    316   bool UnalignedScratchAccess;
    317   bool UnalignedBufferAccess;
    318   bool HasApertureRegs;
    319   bool EnableXNACK;
    320   bool TrapHandler;
    321   bool DebuggerInsertNops;
    322   bool DebuggerEmitPrologue;
    323 
    324   // Used as options.
    325   bool EnableHugePrivateBuffer;
    326   bool EnableVGPRSpilling;
    327   bool EnableLoadStoreOpt;
    328   bool EnableUnsafeDSOffsetFolding;
    329   bool EnableSIScheduler;
    330   bool EnableDS128;
    331   bool DumpCode;
    332 
    333   // Subtarget statically properties set by tablegen
    334   bool FP64;
    335   bool FMA;
    336   bool MIMG_R128;
    337   bool IsGCN;
    338   bool GCN3Encoding;
    339   bool CIInsts;
    340   bool GFX9Insts;
    341   bool SGPRInitBug;
    342   bool HasSMemRealTime;
    343   bool HasIntClamp;
    344   bool HasFmaMixInsts;
    345   bool HasMovrel;
    346   bool HasVGPRIndexMode;
    347   bool HasScalarStores;
    348   bool HasScalarAtomics;
    349   bool HasInv2PiInlineImm;
    350   bool HasSDWAOmod;
    351   bool HasSDWAScalar;
    352   bool HasSDWASdst;
    353   bool HasSDWAMac;
    354   bool HasSDWAOutModsVOPC;
    355   bool HasDPP;
    356   bool HasDLInsts;
    357   bool D16PreservesUnusedBits;
    358   bool FlatAddressSpace;
    359   bool FlatInstOffsets;
    360   bool FlatGlobalInsts;
    361   bool FlatScratchInsts;
    362   bool AddNoCarryInsts;
    363   bool HasUnpackedD16VMem;
    364   bool R600ALUInst;
    365   bool CaymanISA;
    366   bool CFALUBug;
    367   bool HasVertexCache;
    368   short TexVTXClauseSize;
    369   bool ScalarizeGlobal;
    370 
    371   // Dummy feature to use for assembler in tablegen.
    372   bool FeatureDisable;
    373 
    374   SelectionDAGTargetInfo TSInfo;
    375   AMDGPUAS AS;
    376 private:
    377   SIInstrInfo InstrInfo;
    378   SITargetLowering TLInfo;
    379   SIFrameLowering FrameLowering;
    380 
    381 public:
    382   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
    383                const GCNTargetMachine &TM);
    384   ~GCNSubtarget() override;
    385 
    386   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
    387                                                    StringRef GPU, StringRef FS);
    388 
    389   const SIInstrInfo *getInstrInfo() const override {
    390     return &InstrInfo;
    391   }
    392 
    393   const SIFrameLowering *getFrameLowering() const override {
    394     return &FrameLowering;
    395   }
    396 
    397   const SITargetLowering *getTargetLowering() const override {
    398     return &TLInfo;
    399   }
    400 
    401   const SIRegisterInfo *getRegisterInfo() const override {
    402     return &InstrInfo.getRegisterInfo();
    403   }
    404 
    405   const CallLowering *getCallLowering() const override {
    406     return CallLoweringInfo.get();
    407   }
    408 
    409   const InstructionSelector *getInstructionSelector() const override {
    410     return InstSelector.get();
    411   }
    412 
    413   const LegalizerInfo *getLegalizerInfo() const override {
    414     return Legalizer.get();
    415   }
    416 
    417   const RegisterBankInfo *getRegBankInfo() const override {
    418     return RegBankInfo.get();
    419   }
    420 
    421   // Nothing implemented, just prevent crashes on use.
    422   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
    423     return &TSInfo;
    424   }
    425 
    426   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
    427 
    428   Generation getGeneration() const {
    429     return (Generation)Gen;
    430   }
    431 
    432   unsigned getWavefrontSizeLog2() const {
    433     return Log2_32(WavefrontSize);
    434   }
    435 
    436   int getLDSBankCount() const {
    437     return LDSBankCount;
    438   }
    439 
    440   unsigned getMaxPrivateElementSize() const {
    441     return MaxPrivateElementSize;
    442   }
    443 
    444   AMDGPUAS getAMDGPUAS() const {
    445     return AS;
    446   }
    447 
    448   bool hasIntClamp() const {
    449     return HasIntClamp;
    450   }
    451 
    452   bool hasFP64() const {
    453     return FP64;
    454   }
    455 
    456   bool hasMIMG_R128() const {
    457     return MIMG_R128;
    458   }
    459 
    460   bool hasHWFP64() const {
    461     return FP64;
    462   }
    463 
    464   bool hasFastFMAF32() const {
    465     return FastFMAF32;
    466   }
    467 
    468   bool hasHalfRate64Ops() const {
    469     return HalfRate64Ops;
    470   }
    471 
    472   bool hasAddr64() const {
    473     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
    474   }
    475 
    476   bool hasBFE() const {
    477     return true;
    478   }
    479 
    480   bool hasBFI() const {
    481     return true;
    482   }
    483 
    484   bool hasBFM() const {
    485     return hasBFE();
    486   }
    487 
    488   bool hasBCNT(unsigned Size) const {
    489     return true;
    490   }
    491 
    492   bool hasFFBL() const {
    493     return true;
    494   }
    495 
    496   bool hasFFBH() const {
    497     return true;
    498   }
    499 
    500   bool hasMed3_16() const {
    501     return getGeneration() >= AMDGPUSubtarget::GFX9;
    502   }
    503 
    504   bool hasMin3Max3_16() const {
    505     return getGeneration() >= AMDGPUSubtarget::GFX9;
    506   }
    507 
    508   bool hasFmaMixInsts() const {
    509     return HasFmaMixInsts;
    510   }
    511 
    512   bool hasCARRY() const {
    513     return true;
    514   }
    515 
    516   bool hasFMA() const {
    517     return FMA;
    518   }
    519 
    520   TrapHandlerAbi getTrapHandlerAbi() const {
    521     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
    522   }
    523 
    524   bool enableHugePrivateBuffer() const {
    525     return EnableHugePrivateBuffer;
    526   }
    527 
    528   bool unsafeDSOffsetFoldingEnabled() const {
    529     return EnableUnsafeDSOffsetFolding;
    530   }
    531 
    532   bool dumpCode() const {
    533     return DumpCode;
    534   }
    535 
    536   /// Return the amount of LDS that can be used that will not restrict the
    537   /// occupancy lower than WaveCount.
    538   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
    539                                            const Function &) const;
    540 
    541   bool hasFP16Denormals() const {
    542     return FP64FP16Denormals;
    543   }
    544 
    545   bool hasFP64Denormals() const {
    546     return FP64FP16Denormals;
    547   }
    548 
    549   bool supportsMinMaxDenormModes() const {
    550     return getGeneration() >= AMDGPUSubtarget::GFX9;
    551   }
    552 
    553   bool enableDX10Clamp() const {
    554     return DX10Clamp;
    555   }
    556 
    557   bool enableIEEEBit(const MachineFunction &MF) const {
    558     return AMDGPU::isCompute(MF.getFunction().getCallingConv());
    559   }
    560 
    561   bool useFlatForGlobal() const {
    562     return FlatForGlobal;
    563   }
    564 
    565   /// \returns If target supports ds_read/write_b128 and user enables generation
    566   /// of ds_read/write_b128.
    567   bool useDS128() const {
    568     return CIInsts && EnableDS128;
    569   }
    570 
    571   /// \returns If MUBUF instructions always perform range checking, even for
    572   /// buffer resources used for private memory access.
    573   bool privateMemoryResourceIsRangeChecked() const {
    574     return getGeneration() < AMDGPUSubtarget::GFX9;
    575   }
    576 
    577   bool hasAutoWaitcntBeforeBarrier() const {
    578     return AutoWaitcntBeforeBarrier;
    579   }
    580 
    581   bool hasCodeObjectV3() const {
    582     return CodeObjectV3;
    583   }
    584 
    585   bool hasUnalignedBufferAccess() const {
    586     return UnalignedBufferAccess;
    587   }
    588 
    589   bool hasUnalignedScratchAccess() const {
    590     return UnalignedScratchAccess;
    591   }
    592 
    593   bool hasApertureRegs() const {
    594     return HasApertureRegs;
    595   }
    596 
    597   bool isTrapHandlerEnabled() const {
    598     return TrapHandler;
    599   }
    600 
    601   bool isXNACKEnabled() const {
    602     return EnableXNACK;
    603   }
    604 
    605   bool hasFlatAddressSpace() const {
    606     return FlatAddressSpace;
    607   }
    608 
    609   bool hasFlatInstOffsets() const {
    610     return FlatInstOffsets;
    611   }
    612 
    613   bool hasFlatGlobalInsts() const {
    614     return FlatGlobalInsts;
    615   }
    616 
    617   bool hasFlatScratchInsts() const {
    618     return FlatScratchInsts;
    619   }
    620 
    621   bool hasFlatLgkmVMemCountInOrder() const {
    622     return getGeneration() > GFX9;
    623   }
    624 
    625   bool hasD16LoadStore() const {
    626     return getGeneration() >= GFX9;
    627   }
    628 
    629   /// Return if most LDS instructions have an m0 use that require m0 to be
    630   /// iniitalized.
    631   bool ldsRequiresM0Init() const {
    632     return getGeneration() < GFX9;
    633   }
    634 
    635   bool hasAddNoCarry() const {
    636     return AddNoCarryInsts;
    637   }
    638 
    639   bool hasUnpackedD16VMem() const {
    640     return HasUnpackedD16VMem;
    641   }
    642 
    643   // Covers VS/PS/CS graphics shaders
    644   bool isMesaGfxShader(const Function &F) const {
    645     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
    646   }
    647 
    648   bool hasMad64_32() const {
    649     return getGeneration() >= SEA_ISLANDS;
    650   }
    651 
    652   bool hasSDWAOmod() const {
    653     return HasSDWAOmod;
    654   }
    655 
    656   bool hasSDWAScalar() const {
    657     return HasSDWAScalar;
    658   }
    659 
    660   bool hasSDWASdst() const {
    661     return HasSDWASdst;
    662   }
    663 
    664   bool hasSDWAMac() const {
    665     return HasSDWAMac;
    666   }
    667 
    668   bool hasSDWAOutModsVOPC() const {
    669     return HasSDWAOutModsVOPC;
    670   }
    671 
    672   bool vmemWriteNeedsExpWaitcnt() const {
    673     return getGeneration() < SEA_ISLANDS;
    674   }
    675 
    676   bool hasDLInsts() const {
    677     return HasDLInsts;
    678   }
    679 
    680   bool d16PreservesUnusedBits() const {
    681     return D16PreservesUnusedBits;
    682   }
    683 
    684   // Scratch is allocated in 256 dword per wave blocks for the entire
    685   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
    686   // is 4-byte aligned.
    687   //
    688   // Only 4-byte alignment is really needed to access anything. Transformations
    689   // on the pointer value itself may rely on the alignment / known low bits of
    690   // the pointer. Set this to something above the minimum to avoid needing
    691   // dynamic realignment in common cases.
    692   unsigned getStackAlignment() const {
    693     return 16;
    694   }
    695 
    696   bool enableMachineScheduler() const override {
    697     return true;
    698   }
    699 
    700   bool enableSubRegLiveness() const override {
    701     return true;
    702   }
    703 
    704   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
    705   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
    706 
    707   /// \returns Number of execution units per compute unit supported by the
    708   /// subtarget.
    709   unsigned getEUsPerCU() const {
    710     return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
    711   }
    712 
    713   /// \returns Maximum number of waves per compute unit supported by the
    714   /// subtarget without any kind of limitation.
    715   unsigned getMaxWavesPerCU() const {
    716     return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
    717   }
    718 
    719   /// \returns Maximum number of waves per compute unit supported by the
    720   /// subtarget and limited by given \p FlatWorkGroupSize.
    721   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
    722     return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
    723                                              FlatWorkGroupSize);
    724   }
    725 
    726   /// \returns Maximum number of waves per execution unit supported by the
    727   /// subtarget without any kind of limitation.
    728   unsigned getMaxWavesPerEU() const {
    729     return AMDGPU::IsaInfo::getMaxWavesPerEU();
    730   }
    731 
    732   /// \returns Number of waves per work group supported by the subtarget and
    733   /// limited by given \p FlatWorkGroupSize.
    734   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
    735     return AMDGPU::IsaInfo::getWavesPerWorkGroup(
    736         MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
    737   }
    738 
    739   // static wrappers
    740   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
    741 
    742   // XXX - Why is this here if it isn't in the default pass set?
    743   bool enableEarlyIfConversion() const override {
    744     return true;
    745   }
    746 
    747   void overrideSchedPolicy(MachineSchedPolicy &Policy,
    748                            unsigned NumRegionInstrs) const override;
    749 
    750   bool isVGPRSpillingEnabled(const Function &F) const;
    751 
    752   unsigned getMaxNumUserSGPRs() const {
    753     return 16;
    754   }
    755 
    756   bool hasSMemRealTime() const {
    757     return HasSMemRealTime;
    758   }
    759 
    760   bool hasMovrel() const {
    761     return HasMovrel;
    762   }
    763 
    764   bool hasVGPRIndexMode() const {
    765     return HasVGPRIndexMode;
    766   }
    767 
    768   bool useVGPRIndexMode(bool UserEnable) const {
    769     return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
    770   }
    771 
    772   bool hasScalarCompareEq64() const {
    773     return getGeneration() >= VOLCANIC_ISLANDS;
    774   }
    775 
    776   bool hasScalarStores() const {
    777     return HasScalarStores;
    778   }
    779 
    780   bool hasScalarAtomics() const {
    781     return HasScalarAtomics;
    782   }
    783 
    784   bool hasInv2PiInlineImm() const {
    785     return HasInv2PiInlineImm;
    786   }
    787 
    788   bool hasDPP() const {
    789     return HasDPP;
    790   }
    791 
    792   bool enableSIScheduler() const {
    793     return EnableSIScheduler;
    794   }
    795 
    796   bool debuggerSupported() const {
    797     return debuggerInsertNops() && debuggerEmitPrologue();
    798   }
    799 
    800   bool debuggerInsertNops() const {
    801     return DebuggerInsertNops;
    802   }
    803 
    804   bool debuggerEmitPrologue() const {
    805     return DebuggerEmitPrologue;
    806   }
    807 
    808   bool loadStoreOptEnabled() const {
    809     return EnableLoadStoreOpt;
    810   }
    811 
    812   bool hasSGPRInitBug() const {
    813     return SGPRInitBug;
    814   }
    815 
    816   bool has12DWordStoreHazard() const {
    817     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
    818   }
    819 
    820   bool hasSMovFedHazard() const {
    821     return getGeneration() >= AMDGPUSubtarget::GFX9;
    822   }
    823 
    824   bool hasReadM0MovRelInterpHazard() const {
    825     return getGeneration() >= AMDGPUSubtarget::GFX9;
    826   }
    827 
    828   bool hasReadM0SendMsgHazard() const {
    829     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
    830   }
    831 
    832   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
    833   /// SGPRs
    834   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
    835 
    836   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
    837   /// VGPRs
    838   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
    839 
    840   /// \returns true if the flat_scratch register should be initialized with the
    841   /// pointer to the wave's scratch memory rather than a size and offset.
    842   bool flatScratchIsPointer() const {
    843     return getGeneration() >= AMDGPUSubtarget::GFX9;
    844   }
    845 
    846   /// \returns true if the machine has merged shaders in which s0-s7 are
    847   /// reserved by the hardware and user SGPRs start at s8
    848   bool hasMergedShaders() const {
    849     return getGeneration() >= GFX9;
    850   }
    851 
    852   /// \returns SGPR allocation granularity supported by the subtarget.
    853   unsigned getSGPRAllocGranule() const {
    854     return AMDGPU::IsaInfo::getSGPRAllocGranule(
    855         MCSubtargetInfo::getFeatureBits());
    856   }
    857 
    858   /// \returns SGPR encoding granularity supported by the subtarget.
    859   unsigned getSGPREncodingGranule() const {
    860     return AMDGPU::IsaInfo::getSGPREncodingGranule(
    861         MCSubtargetInfo::getFeatureBits());
    862   }
    863 
    864   /// \returns Total number of SGPRs supported by the subtarget.
    865   unsigned getTotalNumSGPRs() const {
    866     return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
    867   }
    868 
    869   /// \returns Addressable number of SGPRs supported by the subtarget.
    870   unsigned getAddressableNumSGPRs() const {
    871     return AMDGPU::IsaInfo::getAddressableNumSGPRs(
    872         MCSubtargetInfo::getFeatureBits());
    873   }
    874 
    875   /// \returns Minimum number of SGPRs that meets the given number of waves per
    876   /// execution unit requirement supported by the subtarget.
    877   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
    878     return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
    879                                            WavesPerEU);
    880   }
    881 
    882   /// \returns Maximum number of SGPRs that meets the given number of waves per
    883   /// execution unit requirement supported by the subtarget.
    884   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
    885     return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
    886                                            WavesPerEU, Addressable);
    887   }
    888 
    889   /// \returns Reserved number of SGPRs for given function \p MF.
    890   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
    891 
    892   /// \returns Maximum number of SGPRs that meets number of waves per execution
    893   /// unit requirement for function \p MF, or number of SGPRs explicitly
    894   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
    895   ///
    896   /// \returns Value that meets number of waves per execution unit requirement
    897   /// if explicitly requested value cannot be converted to integer, violates
    898   /// subtarget's specifications, or does not meet number of waves per execution
    899   /// unit requirement.
    900   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
    901 
    902   /// \returns VGPR allocation granularity supported by the subtarget.
    903   unsigned getVGPRAllocGranule() const {
    904     return AMDGPU::IsaInfo::getVGPRAllocGranule(
    905         MCSubtargetInfo::getFeatureBits());
    906   }
    907 
    908   /// \returns VGPR encoding granularity supported by the subtarget.
    909   unsigned getVGPREncodingGranule() const {
    910     return AMDGPU::IsaInfo::getVGPREncodingGranule(
    911         MCSubtargetInfo::getFeatureBits());
    912   }
    913 
    914   /// \returns Total number of VGPRs supported by the subtarget.
    915   unsigned getTotalNumVGPRs() const {
    916     return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
    917   }
    918 
    919   /// \returns Addressable number of VGPRs supported by the subtarget.
    920   unsigned getAddressableNumVGPRs() const {
    921     return AMDGPU::IsaInfo::getAddressableNumVGPRs(
    922         MCSubtargetInfo::getFeatureBits());
    923   }
    924 
    925   /// \returns Minimum number of VGPRs that meets given number of waves per
    926   /// execution unit requirement supported by the subtarget.
    927   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
    928     return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
    929                                            WavesPerEU);
    930   }
    931 
    932   /// \returns Maximum number of VGPRs that meets given number of waves per
    933   /// execution unit requirement supported by the subtarget.
    934   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
    935     return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
    936                                            WavesPerEU);
    937   }
    938 
    939   /// \returns Maximum number of VGPRs that meets number of waves per execution
    940   /// unit requirement for function \p MF, or number of VGPRs explicitly
    941   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
    942   ///
    943   /// \returns Value that meets number of waves per execution unit requirement
    944   /// if explicitly requested value cannot be converted to integer, violates
    945   /// subtarget's specifications, or does not meet number of waves per execution
    946   /// unit requirement.
    947   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
    948 
    949   void getPostRAMutations(
    950       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
    951       const override;
    952 };
    953 
    954 class R600Subtarget final : public R600GenSubtargetInfo,
    955                             public AMDGPUSubtarget {
    956 private:
    957   R600InstrInfo InstrInfo;
    958   R600FrameLowering FrameLowering;
    959   bool FMA;
    960   bool CaymanISA;
    961   bool CFALUBug;
    962   bool DX10Clamp;
    963   bool HasVertexCache;
    964   bool R600ALUInst;
    965   bool FP64;
    966   short TexVTXClauseSize;
    967   Generation Gen;
    968   R600TargetLowering TLInfo;
    969   InstrItineraryData InstrItins;
    970   SelectionDAGTargetInfo TSInfo;
    971   AMDGPUAS AS;
    972 
    973 public:
    974   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
    975                 const TargetMachine &TM);
    976 
    977   const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
    978 
    979   const R600FrameLowering *getFrameLowering() const override {
    980     return &FrameLowering;
    981   }
    982 
    983   const R600TargetLowering *getTargetLowering() const override {
    984     return &TLInfo;
    985   }
    986 
    987   const R600RegisterInfo *getRegisterInfo() const override {
    988     return &InstrInfo.getRegisterInfo();
    989   }
    990 
    991   const InstrItineraryData *getInstrItineraryData() const override {
    992     return &InstrItins;
    993   }
    994 
    995   // Nothing implemented, just prevent crashes on use.
    996   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
    997     return &TSInfo;
    998   }
    999 
   1000   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   1001 
   1002   Generation getGeneration() const {
   1003     return Gen;
   1004   }
   1005 
   1006   unsigned getStackAlignment() const {
   1007     return 4;
   1008   }
   1009 
   1010   R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
   1011                                                  StringRef GPU, StringRef FS);
   1012 
   1013   bool hasBFE() const {
   1014     return (getGeneration() >= EVERGREEN);
   1015   }
   1016 
   1017   bool hasBFI() const {
   1018     return (getGeneration() >= EVERGREEN);
   1019   }
   1020 
   1021   bool hasBCNT(unsigned Size) const {
   1022     if (Size == 32)
   1023       return (getGeneration() >= EVERGREEN);
   1024 
   1025     return false;
   1026   }
   1027 
   1028   bool hasBORROW() const {
   1029     return (getGeneration() >= EVERGREEN);
   1030   }
   1031 
   1032   bool hasCARRY() const {
   1033     return (getGeneration() >= EVERGREEN);
   1034   }
   1035 
   1036   bool hasCaymanISA() const {
   1037     return CaymanISA;
   1038   }
   1039 
   1040   bool hasFFBL() const {
   1041     return (getGeneration() >= EVERGREEN);
   1042   }
   1043 
   1044   bool hasFFBH() const {
   1045     return (getGeneration() >= EVERGREEN);
   1046   }
   1047 
   1048   bool hasFMA() const { return FMA; }
   1049 
   1050   bool hasCFAluBug() const { return CFALUBug; }
   1051 
   1052   bool hasVertexCache() const { return HasVertexCache; }
   1053 
   1054   short getTexVTXClauseSize() const { return TexVTXClauseSize; }
   1055 
   1056   AMDGPUAS getAMDGPUAS() const { return AS; }
   1057 
   1058   bool enableMachineScheduler() const override {
   1059     return true;
   1060   }
   1061 
   1062   bool enableSubRegLiveness() const override {
   1063     return true;
   1064   }
   1065 };
   1066 
   1067 } // end namespace llvm
   1068 
   1069 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
   1070