Home | History | Annotate | Download | only in AMDGPU
      1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "AMDGPUSubtarget.h"
     16 #include "R600ISelLowering.h"
     17 #include "R600InstrInfo.h"
     18 #include "SIFrameLowering.h"
     19 #include "SIISelLowering.h"
     20 #include "SIInstrInfo.h"
     21 #include "SIMachineFunctionInfo.h"
     22 #include "llvm/ADT/SmallString.h"
     23 #include "llvm/CodeGen/MachineScheduler.h"
     24 
     25 using namespace llvm;
     26 
     27 #define DEBUG_TYPE "amdgpu-subtarget"
     28 
     29 #define GET_SUBTARGETINFO_ENUM
     30 #define GET_SUBTARGETINFO_TARGET_DESC
     31 #define GET_SUBTARGETINFO_CTOR
     32 #include "AMDGPUGenSubtargetInfo.inc"
     33 
     34 AMDGPUSubtarget::~AMDGPUSubtarget() {}
     35 
     36 AMDGPUSubtarget &
     37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
     38                                                  StringRef GPU, StringRef FS) {
     39   // Determine default and user-specified characteristics
     40   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
     41   // enabled, but some instructions do not respect them and they run at the
     42   // double precision rate, so don't enable by default.
     43   //
     44   // We want to be able to turn these off, but making this a subtarget feature
     45   // for SI has the unhelpful behavior that it unsets everything else if you
     46   // disable it.
     47 
     48   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
     49   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
     50     FullFS += "+flat-for-global,+unaligned-buffer-access,";
     51   FullFS += FS;
     52 
     53   ParseSubtargetFeatures(GPU, FullFS);
     54 
     55   // FIXME: I don't think think Evergreen has any useful support for
     56   // denormals, but should be checked. Should we issue a warning somewhere
     57   // if someone tries to enable these?
     58   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
     59     FP32Denormals = false;
     60     FP64Denormals = false;
     61   }
     62 
     63   // Set defaults if needed.
     64   if (MaxPrivateElementSize == 0)
     65     MaxPrivateElementSize = 4;
     66 
     67   return *this;
     68 }
     69 
     70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     71                                  const TargetMachine &TM)
     72   : AMDGPUGenSubtargetInfo(TT, GPU, FS),
     73     TargetTriple(TT),
     74     Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
     75     IsaVersion(ISAVersion0_0_0),
     76     WavefrontSize(64),
     77     LocalMemorySize(0),
     78     LDSBankCount(0),
     79     MaxPrivateElementSize(0),
     80 
     81     FastFMAF32(false),
     82     HalfRate64Ops(false),
     83 
     84     FP32Denormals(false),
     85     FP64Denormals(false),
     86     FPExceptions(false),
     87     FlatForGlobal(false),
     88     UnalignedBufferAccess(false),
     89 
     90     EnableXNACK(false),
     91     DebuggerInsertNops(false),
     92     DebuggerReserveRegs(false),
     93     DebuggerEmitPrologue(false),
     94 
     95     EnableVGPRSpilling(false),
     96     EnablePromoteAlloca(false),
     97     EnableLoadStoreOpt(false),
     98     EnableUnsafeDSOffsetFolding(false),
     99     EnableSIScheduler(false),
    100     DumpCode(false),
    101 
    102     FP64(false),
    103     IsGCN(false),
    104     GCN1Encoding(false),
    105     GCN3Encoding(false),
    106     CIInsts(false),
    107     SGPRInitBug(false),
    108     HasSMemRealTime(false),
    109     Has16BitInsts(false),
    110     FlatAddressSpace(false),
    111 
    112     R600ALUInst(false),
    113     CaymanISA(false),
    114     CFALUBug(false),
    115     HasVertexCache(false),
    116     TexVTXClauseSize(0),
    117 
    118     FeatureDisable(false),
    119     InstrItins(getInstrItineraryForCPU(GPU)) {
    120   initializeSubtargetDependencies(TT, GPU, FS);
    121 }
    122 
    123 // FIXME: These limits are for SI. Did they change with the larger maximum LDS
    124 // size?
    125 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
    126   switch (NWaves) {
    127   case 10:
    128     return 1638;
    129   case 9:
    130     return 1820;
    131   case 8:
    132     return 2048;
    133   case 7:
    134     return 2340;
    135   case 6:
    136     return 2730;
    137   case 5:
    138     return 3276;
    139   case 4:
    140     return 4096;
    141   case 3:
    142     return 5461;
    143   case 2:
    144     return 8192;
    145   default:
    146     return getLocalMemorySize();
    147   }
    148 }
    149 
    150 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
    151   if (Bytes <= 1638)
    152     return 10;
    153 
    154   if (Bytes <= 1820)
    155     return 9;
    156 
    157   if (Bytes <= 2048)
    158     return 8;
    159 
    160   if (Bytes <= 2340)
    161     return 7;
    162 
    163   if (Bytes <= 2730)
    164     return 6;
    165 
    166   if (Bytes <= 3276)
    167     return 5;
    168 
    169   if (Bytes <= 4096)
    170     return 4;
    171 
    172   if (Bytes <= 5461)
    173     return 3;
    174 
    175   if (Bytes <= 8192)
    176     return 2;
    177 
    178   return 1;
    179 }
    180 
    181 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
    182                              const TargetMachine &TM) :
    183   AMDGPUSubtarget(TT, GPU, FS, TM),
    184   InstrInfo(*this),
    185   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
    186   TLInfo(TM, *this) {}
    187 
    188 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
    189                          const TargetMachine &TM) :
    190   AMDGPUSubtarget(TT, GPU, FS, TM),
    191   InstrInfo(*this),
    192   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
    193   TLInfo(TM, *this),
    194   GISel() {}
    195 
    196 unsigned R600Subtarget::getStackEntrySize() const {
    197   switch (getWavefrontSize()) {
    198   case 16:
    199     return 8;
    200   case 32:
    201     return hasCaymanISA() ? 4 : 8;
    202   case 64:
    203     return 4;
    204   default:
    205     llvm_unreachable("Illegal wavefront size.");
    206   }
    207 }
    208 
    209 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
    210                                       unsigned NumRegionInstrs) const {
    211   // Track register pressure so the scheduler can try to decrease
    212   // pressure once register usage is above the threshold defined by
    213   // SIRegisterInfo::getRegPressureSetLimit()
    214   Policy.ShouldTrackPressure = true;
    215 
    216   // Enabling both top down and bottom up scheduling seems to give us less
    217   // register spills than just using one of these approaches on its own.
    218   Policy.OnlyTopDown = false;
    219   Policy.OnlyBottomUp = false;
    220 
    221   // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
    222   if (!enableSIScheduler())
    223     Policy.ShouldTrackLaneMasks = true;
    224 }
    225 
    226 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
    227   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
    228 }
    229 
    230 unsigned SISubtarget::getAmdKernelCodeChipID() const {
    231   switch (getGeneration()) {
    232   case SEA_ISLANDS:
    233     return 12;
    234   default:
    235     llvm_unreachable("ChipID unknown");
    236   }
    237 }
    238 
    239 AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
    240   return AMDGPU::getIsaVersion(getFeatureBits());
    241 }
    242