Home | History | Annotate | Download | only in AMDGPU
      1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief AMDGPU specific subclass of TargetSubtarget.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
     16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
     17 
     18 #include "AMDGPU.h"
     19 #include "R600InstrInfo.h"
     20 #include "R600ISelLowering.h"
     21 #include "R600FrameLowering.h"
     22 #include "SIInstrInfo.h"
     23 #include "SIISelLowering.h"
     24 #include "SIFrameLowering.h"
     25 #include "Utils/AMDGPUBaseInfo.h"
     26 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
     27 #include "llvm/Target/TargetSubtargetInfo.h"
     28 
     29 #define GET_SUBTARGETINFO_HEADER
     30 #include "AMDGPUGenSubtargetInfo.inc"
     31 
     32 namespace llvm {
     33 
     34 class SIMachineFunctionInfo;
     35 class StringRef;
     36 
     37 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
     38 public:
     39   enum Generation {
     40     R600 = 0,
     41     R700,
     42     EVERGREEN,
     43     NORTHERN_ISLANDS,
     44     SOUTHERN_ISLANDS,
     45     SEA_ISLANDS,
     46     VOLCANIC_ISLANDS,
     47   };
     48 
     49   enum {
     50     ISAVersion0_0_0,
     51     ISAVersion7_0_0,
     52     ISAVersion7_0_1,
     53     ISAVersion8_0_0,
     54     ISAVersion8_0_1,
     55     ISAVersion8_0_3
     56   };
     57 
     58 protected:
     59   // Basic subtarget description.
     60   Triple TargetTriple;
     61   Generation Gen;
     62   unsigned IsaVersion;
     63   unsigned WavefrontSize;
     64   int LocalMemorySize;
     65   int LDSBankCount;
     66   unsigned MaxPrivateElementSize;
     67 
     68   // Possibly statically set by tablegen, but may want to be overridden.
     69   bool FastFMAF32;
     70   bool HalfRate64Ops;
     71 
     72   // Dynamially set bits that enable features.
     73   bool FP32Denormals;
     74   bool FP64Denormals;
     75   bool FPExceptions;
     76   bool FlatForGlobal;
     77   bool UnalignedBufferAccess;
     78   bool EnableXNACK;
     79   bool DebuggerInsertNops;
     80   bool DebuggerReserveRegs;
     81   bool DebuggerEmitPrologue;
     82 
     83   // Used as options.
     84   bool EnableVGPRSpilling;
     85   bool EnablePromoteAlloca;
     86   bool EnableLoadStoreOpt;
     87   bool EnableUnsafeDSOffsetFolding;
     88   bool EnableSIScheduler;
     89   bool DumpCode;
     90 
     91   // Subtarget statically properties set by tablegen
     92   bool FP64;
     93   bool IsGCN;
     94   bool GCN1Encoding;
     95   bool GCN3Encoding;
     96   bool CIInsts;
     97   bool SGPRInitBug;
     98   bool HasSMemRealTime;
     99   bool Has16BitInsts;
    100   bool FlatAddressSpace;
    101   bool R600ALUInst;
    102   bool CaymanISA;
    103   bool CFALUBug;
    104   bool HasVertexCache;
    105   short TexVTXClauseSize;
    106 
    107   // Dummy feature to use for assembler in tablegen.
    108   bool FeatureDisable;
    109 
    110   InstrItineraryData InstrItins;
    111 
    112 public:
    113   AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
    114                   const TargetMachine &TM);
    115   virtual ~AMDGPUSubtarget();
    116   AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
    117                                                    StringRef GPU, StringRef FS);
    118 
    119   const AMDGPUInstrInfo *getInstrInfo() const override;
    120   const AMDGPUFrameLowering *getFrameLowering() const override;
    121   const AMDGPUTargetLowering *getTargetLowering() const override;
    122   const AMDGPURegisterInfo *getRegisterInfo() const override;
    123 
    124   const InstrItineraryData *getInstrItineraryData() const override {
    125     return &InstrItins;
    126   }
    127 
    128   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
    129 
    130   bool isAmdHsaOS() const {
    131     return TargetTriple.getOS() == Triple::AMDHSA;
    132   }
    133 
    134   Generation getGeneration() const {
    135     return Gen;
    136   }
    137 
    138   unsigned getWavefrontSize() const {
    139     return WavefrontSize;
    140   }
    141 
    142   int getLocalMemorySize() const {
    143     return LocalMemorySize;
    144   }
    145 
    146   int getLDSBankCount() const {
    147     return LDSBankCount;
    148   }
    149 
    150   unsigned getMaxPrivateElementSize() const {
    151     return MaxPrivateElementSize;
    152   }
    153 
    154   bool hasHWFP64() const {
    155     return FP64;
    156   }
    157 
    158   bool hasFastFMAF32() const {
    159     return FastFMAF32;
    160   }
    161 
    162   bool hasHalfRate64Ops() const {
    163     return HalfRate64Ops;
    164   }
    165 
    166   bool hasAddr64() const {
    167     return (getGeneration() < VOLCANIC_ISLANDS);
    168   }
    169 
    170   bool hasBFE() const {
    171     return (getGeneration() >= EVERGREEN);
    172   }
    173 
    174   bool hasBFI() const {
    175     return (getGeneration() >= EVERGREEN);
    176   }
    177 
    178   bool hasBFM() const {
    179     return hasBFE();
    180   }
    181 
    182   bool hasBCNT(unsigned Size) const {
    183     if (Size == 32)
    184       return (getGeneration() >= EVERGREEN);
    185 
    186     if (Size == 64)
    187       return (getGeneration() >= SOUTHERN_ISLANDS);
    188 
    189     return false;
    190   }
    191 
    192   bool hasMulU24() const {
    193     return (getGeneration() >= EVERGREEN);
    194   }
    195 
    196   bool hasMulI24() const {
    197     return (getGeneration() >= SOUTHERN_ISLANDS ||
    198             hasCaymanISA());
    199   }
    200 
    201   bool hasFFBL() const {
    202     return (getGeneration() >= EVERGREEN);
    203   }
    204 
    205   bool hasFFBH() const {
    206     return (getGeneration() >= EVERGREEN);
    207   }
    208 
    209   bool hasCARRY() const {
    210     return (getGeneration() >= EVERGREEN);
    211   }
    212 
    213   bool hasBORROW() const {
    214     return (getGeneration() >= EVERGREEN);
    215   }
    216 
    217   bool hasCaymanISA() const {
    218     return CaymanISA;
    219   }
    220 
    221   bool isPromoteAllocaEnabled() const {
    222     return EnablePromoteAlloca;
    223   }
    224 
    225   bool unsafeDSOffsetFoldingEnabled() const {
    226     return EnableUnsafeDSOffsetFolding;
    227   }
    228 
    229   bool dumpCode() const {
    230     return DumpCode;
    231   }
    232 
    233   /// Return the amount of LDS that can be used that will not restrict the
    234   /// occupancy lower than WaveCount.
    235   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
    236 
    237   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
    238   /// the given LDS memory size is the only constraint.
    239   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
    240 
    241 
    242   bool hasFP32Denormals() const {
    243     return FP32Denormals;
    244   }
    245 
    246   bool hasFP64Denormals() const {
    247     return FP64Denormals;
    248   }
    249 
    250   bool hasFPExceptions() const {
    251     return FPExceptions;
    252   }
    253 
    254   bool useFlatForGlobal() const {
    255     return FlatForGlobal;
    256   }
    257 
    258   bool hasUnalignedBufferAccess() const {
    259     return UnalignedBufferAccess;
    260   }
    261 
    262   bool isXNACKEnabled() const {
    263     return EnableXNACK;
    264   }
    265 
    266   unsigned getMaxWavesPerCU() const {
    267     if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
    268       return 10;
    269 
    270     // FIXME: Not sure what this is for other subtagets.
    271     return 8;
    272   }
    273 
    274   /// \brief Returns the offset in bytes from the start of the input buffer
    275   ///        of the first explicit kernel argument.
    276   unsigned getExplicitKernelArgOffset() const {
    277     return isAmdHsaOS() ? 0 : 36;
    278   }
    279 
    280   unsigned getStackAlignment() const {
    281     // Scratch is allocated in 256 dword per wave blocks.
    282     return 4 * 256 / getWavefrontSize();
    283   }
    284 
    285   bool enableMachineScheduler() const override {
    286     return true;
    287   }
    288 
    289   bool enableSubRegLiveness() const override {
    290     return true;
    291   }
    292 };
    293 
    294 class R600Subtarget final : public AMDGPUSubtarget {
    295 private:
    296   R600InstrInfo InstrInfo;
    297   R600FrameLowering FrameLowering;
    298   R600TargetLowering TLInfo;
    299 
    300 public:
    301   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
    302                 const TargetMachine &TM);
    303 
    304   const R600InstrInfo *getInstrInfo() const override {
    305     return &InstrInfo;
    306   }
    307 
    308   const R600FrameLowering *getFrameLowering() const override {
    309     return &FrameLowering;
    310   }
    311 
    312   const R600TargetLowering *getTargetLowering() const override {
    313     return &TLInfo;
    314   }
    315 
    316   const R600RegisterInfo *getRegisterInfo() const override {
    317     return &InstrInfo.getRegisterInfo();
    318   }
    319 
    320   bool hasCFAluBug() const {
    321     return CFALUBug;
    322   }
    323 
    324   bool hasVertexCache() const {
    325     return HasVertexCache;
    326   }
    327 
    328   short getTexVTXClauseSize() const {
    329     return TexVTXClauseSize;
    330   }
    331 
    332   unsigned getStackEntrySize() const;
    333 };
    334 
    335 class SISubtarget final : public AMDGPUSubtarget {
    336 public:
    337   enum {
    338     FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
    339   };
    340 
    341 private:
    342   SIInstrInfo InstrInfo;
    343   SIFrameLowering FrameLowering;
    344   SITargetLowering TLInfo;
    345   std::unique_ptr<GISelAccessor> GISel;
    346 
    347 public:
    348   SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
    349               const TargetMachine &TM);
    350 
    351   const SIInstrInfo *getInstrInfo() const override {
    352     return &InstrInfo;
    353   }
    354 
    355   const SIFrameLowering *getFrameLowering() const override {
    356     return &FrameLowering;
    357   }
    358 
    359   const SITargetLowering *getTargetLowering() const override {
    360     return &TLInfo;
    361   }
    362 
    363   const CallLowering *getCallLowering() const override {
    364     assert(GISel && "Access to GlobalISel APIs not set");
    365     return GISel->getCallLowering();
    366   }
    367 
    368   const SIRegisterInfo *getRegisterInfo() const override {
    369     return &InstrInfo.getRegisterInfo();
    370   }
    371 
    372   void setGISelAccessor(GISelAccessor &GISel) {
    373     this->GISel.reset(&GISel);
    374   }
    375 
    376   void overrideSchedPolicy(MachineSchedPolicy &Policy,
    377                            unsigned NumRegionInstrs) const override;
    378 
    379   bool isVGPRSpillingEnabled(const Function& F) const;
    380 
    381   unsigned getAmdKernelCodeChipID() const;
    382 
    383   AMDGPU::IsaVersion getIsaVersion() const;
    384 
    385   unsigned getMaxNumUserSGPRs() const {
    386     return 16;
    387   }
    388 
    389   bool hasFlatAddressSpace() const {
    390     return FlatAddressSpace;
    391   }
    392 
    393   bool hasSMemRealTime() const {
    394     return HasSMemRealTime;
    395   }
    396 
    397   bool has16BitInsts() const {
    398     return Has16BitInsts;
    399   }
    400 
    401   bool enableSIScheduler() const {
    402     return EnableSIScheduler;
    403   }
    404 
    405   bool debuggerSupported() const {
    406     return debuggerInsertNops() && debuggerReserveRegs() &&
    407       debuggerEmitPrologue();
    408   }
    409 
    410   bool debuggerInsertNops() const {
    411     return DebuggerInsertNops;
    412   }
    413 
    414   bool debuggerReserveRegs() const {
    415     return DebuggerReserveRegs;
    416   }
    417 
    418   bool debuggerEmitPrologue() const {
    419     return DebuggerEmitPrologue;
    420   }
    421 
    422   bool loadStoreOptEnabled() const {
    423     return EnableLoadStoreOpt;
    424   }
    425 
    426   bool hasSGPRInitBug() const {
    427     return SGPRInitBug;
    428   }
    429 };
    430 
    431 
    432 inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
    433   if (getGeneration() >= SOUTHERN_ISLANDS)
    434     return static_cast<const SISubtarget *>(this)->getInstrInfo();
    435 
    436   return static_cast<const R600Subtarget *>(this)->getInstrInfo();
    437 }
    438 
    439 inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const  {
    440   if (getGeneration() >= SOUTHERN_ISLANDS)
    441     return static_cast<const SISubtarget *>(this)->getFrameLowering();
    442 
    443   return static_cast<const R600Subtarget *>(this)->getFrameLowering();
    444 }
    445 
    446 inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const  {
    447   if (getGeneration() >= SOUTHERN_ISLANDS)
    448     return static_cast<const SISubtarget *>(this)->getTargetLowering();
    449 
    450   return static_cast<const R600Subtarget *>(this)->getTargetLowering();
    451 }
    452 
    453 inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const  {
    454   if (getGeneration() >= SOUTHERN_ISLANDS)
    455     return static_cast<const SISubtarget *>(this)->getRegisterInfo();
    456 
    457   return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
    458 }
    459 
    460 } // End namespace llvm
    461 
    462 #endif
    463