1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "R600ISelLowering.h" 17 #include "R600InstrInfo.h" 18 #include "SIFrameLowering.h" 19 #include "SIISelLowering.h" 20 #include "SIInstrInfo.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/CodeGen/MachineScheduler.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "amdgpu-subtarget" 28 29 #define GET_SUBTARGETINFO_ENUM 30 #define GET_SUBTARGETINFO_TARGET_DESC 31 #define GET_SUBTARGETINFO_CTOR 32 #include "AMDGPUGenSubtargetInfo.inc" 33 34 AMDGPUSubtarget::~AMDGPUSubtarget() {} 35 36 AMDGPUSubtarget & 37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 38 StringRef GPU, StringRef FS) { 39 // Determine default and user-specified characteristics 40 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 41 // enabled, but some instructions do not respect them and they run at the 42 // double precision rate, so don't enable by default. 43 // 44 // We want to be able to turn these off, but making this a subtarget feature 45 // for SI has the unhelpful behavior that it unsets everything else if you 46 // disable it. 47 48 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,"); 49 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 50 FullFS += "+flat-for-global,+unaligned-buffer-access,"; 51 FullFS += FS; 52 53 ParseSubtargetFeatures(GPU, FullFS); 54 55 // FIXME: I don't think think Evergreen has any useful support for 56 // denormals, but should be checked. Should we issue a warning somewhere 57 // if someone tries to enable these? 58 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 59 FP32Denormals = false; 60 FP64Denormals = false; 61 } 62 63 // Set defaults if needed. 64 if (MaxPrivateElementSize == 0) 65 MaxPrivateElementSize = 4; 66 67 return *this; 68 } 69 70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 71 const TargetMachine &TM) 72 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 73 TargetTriple(TT), 74 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 75 IsaVersion(ISAVersion0_0_0), 76 WavefrontSize(64), 77 LocalMemorySize(0), 78 LDSBankCount(0), 79 MaxPrivateElementSize(0), 80 81 FastFMAF32(false), 82 HalfRate64Ops(false), 83 84 FP32Denormals(false), 85 FP64Denormals(false), 86 FPExceptions(false), 87 FlatForGlobal(false), 88 UnalignedBufferAccess(false), 89 90 EnableXNACK(false), 91 DebuggerInsertNops(false), 92 DebuggerReserveRegs(false), 93 DebuggerEmitPrologue(false), 94 95 EnableVGPRSpilling(false), 96 EnablePromoteAlloca(false), 97 EnableLoadStoreOpt(false), 98 EnableUnsafeDSOffsetFolding(false), 99 EnableSIScheduler(false), 100 DumpCode(false), 101 102 FP64(false), 103 IsGCN(false), 104 GCN1Encoding(false), 105 GCN3Encoding(false), 106 CIInsts(false), 107 SGPRInitBug(false), 108 HasSMemRealTime(false), 109 Has16BitInsts(false), 110 FlatAddressSpace(false), 111 112 R600ALUInst(false), 113 CaymanISA(false), 114 CFALUBug(false), 115 HasVertexCache(false), 116 TexVTXClauseSize(0), 117 118 FeatureDisable(false), 119 InstrItins(getInstrItineraryForCPU(GPU)) { 120 initializeSubtargetDependencies(TT, GPU, FS); 121 } 122 123 // FIXME: These limits are for SI. Did they change with the larger maximum LDS 124 // size? 125 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const { 126 switch (NWaves) { 127 case 10: 128 return 1638; 129 case 9: 130 return 1820; 131 case 8: 132 return 2048; 133 case 7: 134 return 2340; 135 case 6: 136 return 2730; 137 case 5: 138 return 3276; 139 case 4: 140 return 4096; 141 case 3: 142 return 5461; 143 case 2: 144 return 8192; 145 default: 146 return getLocalMemorySize(); 147 } 148 } 149 150 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const { 151 if (Bytes <= 1638) 152 return 10; 153 154 if (Bytes <= 1820) 155 return 9; 156 157 if (Bytes <= 2048) 158 return 8; 159 160 if (Bytes <= 2340) 161 return 7; 162 163 if (Bytes <= 2730) 164 return 6; 165 166 if (Bytes <= 3276) 167 return 5; 168 169 if (Bytes <= 4096) 170 return 4; 171 172 if (Bytes <= 5461) 173 return 3; 174 175 if (Bytes <= 8192) 176 return 2; 177 178 return 1; 179 } 180 181 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 182 const TargetMachine &TM) : 183 AMDGPUSubtarget(TT, GPU, FS, TM), 184 InstrInfo(*this), 185 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 186 TLInfo(TM, *this) {} 187 188 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 189 const TargetMachine &TM) : 190 AMDGPUSubtarget(TT, GPU, FS, TM), 191 InstrInfo(*this), 192 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 193 TLInfo(TM, *this), 194 GISel() {} 195 196 unsigned R600Subtarget::getStackEntrySize() const { 197 switch (getWavefrontSize()) { 198 case 16: 199 return 8; 200 case 32: 201 return hasCaymanISA() ? 4 : 8; 202 case 64: 203 return 4; 204 default: 205 llvm_unreachable("Illegal wavefront size."); 206 } 207 } 208 209 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 210 unsigned NumRegionInstrs) const { 211 // Track register pressure so the scheduler can try to decrease 212 // pressure once register usage is above the threshold defined by 213 // SIRegisterInfo::getRegPressureSetLimit() 214 Policy.ShouldTrackPressure = true; 215 216 // Enabling both top down and bottom up scheduling seems to give us less 217 // register spills than just using one of these approaches on its own. 218 Policy.OnlyTopDown = false; 219 Policy.OnlyBottomUp = false; 220 221 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 222 if (!enableSIScheduler()) 223 Policy.ShouldTrackLaneMasks = true; 224 } 225 226 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 227 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 228 } 229 230 unsigned SISubtarget::getAmdKernelCodeChipID() const { 231 switch (getGeneration()) { 232 case SEA_ISLANDS: 233 return 12; 234 default: 235 llvm_unreachable("ChipID unknown"); 236 } 237 } 238 239 AMDGPU::IsaVersion SISubtarget::getIsaVersion() const { 240 return AMDGPU::getIsaVersion(getFeatureBits()); 241 } 242