1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 17 18 #include "AMDGPU.h" 19 #include "R600InstrInfo.h" 20 #include "R600ISelLowering.h" 21 #include "R600FrameLowering.h" 22 #include "SIInstrInfo.h" 23 #include "SIISelLowering.h" 24 #include "SIFrameLowering.h" 25 #include "Utils/AMDGPUBaseInfo.h" 26 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" 27 #include "llvm/Target/TargetSubtargetInfo.h" 28 29 #define GET_SUBTARGETINFO_HEADER 30 #include "AMDGPUGenSubtargetInfo.inc" 31 32 namespace llvm { 33 34 class SIMachineFunctionInfo; 35 class StringRef; 36 37 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { 38 public: 39 enum Generation { 40 R600 = 0, 41 R700, 42 EVERGREEN, 43 NORTHERN_ISLANDS, 44 SOUTHERN_ISLANDS, 45 SEA_ISLANDS, 46 VOLCANIC_ISLANDS, 47 }; 48 49 enum { 50 ISAVersion0_0_0, 51 ISAVersion7_0_0, 52 ISAVersion7_0_1, 53 ISAVersion8_0_0, 54 ISAVersion8_0_1, 55 ISAVersion8_0_3 56 }; 57 58 protected: 59 // Basic subtarget description. 60 Triple TargetTriple; 61 Generation Gen; 62 unsigned IsaVersion; 63 unsigned WavefrontSize; 64 int LocalMemorySize; 65 int LDSBankCount; 66 unsigned MaxPrivateElementSize; 67 68 // Possibly statically set by tablegen, but may want to be overridden. 69 bool FastFMAF32; 70 bool HalfRate64Ops; 71 72 // Dynamially set bits that enable features. 73 bool FP32Denormals; 74 bool FP64Denormals; 75 bool FPExceptions; 76 bool FlatForGlobal; 77 bool UnalignedBufferAccess; 78 bool EnableXNACK; 79 bool DebuggerInsertNops; 80 bool DebuggerReserveRegs; 81 bool DebuggerEmitPrologue; 82 83 // Used as options. 84 bool EnableVGPRSpilling; 85 bool EnablePromoteAlloca; 86 bool EnableLoadStoreOpt; 87 bool EnableUnsafeDSOffsetFolding; 88 bool EnableSIScheduler; 89 bool DumpCode; 90 91 // Subtarget statically properties set by tablegen 92 bool FP64; 93 bool IsGCN; 94 bool GCN1Encoding; 95 bool GCN3Encoding; 96 bool CIInsts; 97 bool SGPRInitBug; 98 bool HasSMemRealTime; 99 bool Has16BitInsts; 100 bool FlatAddressSpace; 101 bool R600ALUInst; 102 bool CaymanISA; 103 bool CFALUBug; 104 bool HasVertexCache; 105 short TexVTXClauseSize; 106 107 // Dummy feature to use for assembler in tablegen. 108 bool FeatureDisable; 109 110 InstrItineraryData InstrItins; 111 112 public: 113 AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 114 const TargetMachine &TM); 115 virtual ~AMDGPUSubtarget(); 116 AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT, 117 StringRef GPU, StringRef FS); 118 119 const AMDGPUInstrInfo *getInstrInfo() const override; 120 const AMDGPUFrameLowering *getFrameLowering() const override; 121 const AMDGPUTargetLowering *getTargetLowering() const override; 122 const AMDGPURegisterInfo *getRegisterInfo() const override; 123 124 const InstrItineraryData *getInstrItineraryData() const override { 125 return &InstrItins; 126 } 127 128 void ParseSubtargetFeatures(StringRef CPU, StringRef FS); 129 130 bool isAmdHsaOS() const { 131 return TargetTriple.getOS() == Triple::AMDHSA; 132 } 133 134 Generation getGeneration() const { 135 return Gen; 136 } 137 138 unsigned getWavefrontSize() const { 139 return WavefrontSize; 140 } 141 142 int getLocalMemorySize() const { 143 return LocalMemorySize; 144 } 145 146 int getLDSBankCount() const { 147 return LDSBankCount; 148 } 149 150 unsigned getMaxPrivateElementSize() const { 151 return MaxPrivateElementSize; 152 } 153 154 bool hasHWFP64() const { 155 return FP64; 156 } 157 158 bool hasFastFMAF32() const { 159 return FastFMAF32; 160 } 161 162 bool hasHalfRate64Ops() const { 163 return HalfRate64Ops; 164 } 165 166 bool hasAddr64() const { 167 return (getGeneration() < VOLCANIC_ISLANDS); 168 } 169 170 bool hasBFE() const { 171 return (getGeneration() >= EVERGREEN); 172 } 173 174 bool hasBFI() const { 175 return (getGeneration() >= EVERGREEN); 176 } 177 178 bool hasBFM() const { 179 return hasBFE(); 180 } 181 182 bool hasBCNT(unsigned Size) const { 183 if (Size == 32) 184 return (getGeneration() >= EVERGREEN); 185 186 if (Size == 64) 187 return (getGeneration() >= SOUTHERN_ISLANDS); 188 189 return false; 190 } 191 192 bool hasMulU24() const { 193 return (getGeneration() >= EVERGREEN); 194 } 195 196 bool hasMulI24() const { 197 return (getGeneration() >= SOUTHERN_ISLANDS || 198 hasCaymanISA()); 199 } 200 201 bool hasFFBL() const { 202 return (getGeneration() >= EVERGREEN); 203 } 204 205 bool hasFFBH() const { 206 return (getGeneration() >= EVERGREEN); 207 } 208 209 bool hasCARRY() const { 210 return (getGeneration() >= EVERGREEN); 211 } 212 213 bool hasBORROW() const { 214 return (getGeneration() >= EVERGREEN); 215 } 216 217 bool hasCaymanISA() const { 218 return CaymanISA; 219 } 220 221 bool isPromoteAllocaEnabled() const { 222 return EnablePromoteAlloca; 223 } 224 225 bool unsafeDSOffsetFoldingEnabled() const { 226 return EnableUnsafeDSOffsetFolding; 227 } 228 229 bool dumpCode() const { 230 return DumpCode; 231 } 232 233 /// Return the amount of LDS that can be used that will not restrict the 234 /// occupancy lower than WaveCount. 235 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const; 236 237 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 238 /// the given LDS memory size is the only constraint. 239 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const; 240 241 242 bool hasFP32Denormals() const { 243 return FP32Denormals; 244 } 245 246 bool hasFP64Denormals() const { 247 return FP64Denormals; 248 } 249 250 bool hasFPExceptions() const { 251 return FPExceptions; 252 } 253 254 bool useFlatForGlobal() const { 255 return FlatForGlobal; 256 } 257 258 bool hasUnalignedBufferAccess() const { 259 return UnalignedBufferAccess; 260 } 261 262 bool isXNACKEnabled() const { 263 return EnableXNACK; 264 } 265 266 unsigned getMaxWavesPerCU() const { 267 if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) 268 return 10; 269 270 // FIXME: Not sure what this is for other subtagets. 271 return 8; 272 } 273 274 /// \brief Returns the offset in bytes from the start of the input buffer 275 /// of the first explicit kernel argument. 276 unsigned getExplicitKernelArgOffset() const { 277 return isAmdHsaOS() ? 0 : 36; 278 } 279 280 unsigned getStackAlignment() const { 281 // Scratch is allocated in 256 dword per wave blocks. 282 return 4 * 256 / getWavefrontSize(); 283 } 284 285 bool enableMachineScheduler() const override { 286 return true; 287 } 288 289 bool enableSubRegLiveness() const override { 290 return true; 291 } 292 }; 293 294 class R600Subtarget final : public AMDGPUSubtarget { 295 private: 296 R600InstrInfo InstrInfo; 297 R600FrameLowering FrameLowering; 298 R600TargetLowering TLInfo; 299 300 public: 301 R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, 302 const TargetMachine &TM); 303 304 const R600InstrInfo *getInstrInfo() const override { 305 return &InstrInfo; 306 } 307 308 const R600FrameLowering *getFrameLowering() const override { 309 return &FrameLowering; 310 } 311 312 const R600TargetLowering *getTargetLowering() const override { 313 return &TLInfo; 314 } 315 316 const R600RegisterInfo *getRegisterInfo() const override { 317 return &InstrInfo.getRegisterInfo(); 318 } 319 320 bool hasCFAluBug() const { 321 return CFALUBug; 322 } 323 324 bool hasVertexCache() const { 325 return HasVertexCache; 326 } 327 328 short getTexVTXClauseSize() const { 329 return TexVTXClauseSize; 330 } 331 332 unsigned getStackEntrySize() const; 333 }; 334 335 class SISubtarget final : public AMDGPUSubtarget { 336 public: 337 enum { 338 FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 339 }; 340 341 private: 342 SIInstrInfo InstrInfo; 343 SIFrameLowering FrameLowering; 344 SITargetLowering TLInfo; 345 std::unique_ptr<GISelAccessor> GISel; 346 347 public: 348 SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, 349 const TargetMachine &TM); 350 351 const SIInstrInfo *getInstrInfo() const override { 352 return &InstrInfo; 353 } 354 355 const SIFrameLowering *getFrameLowering() const override { 356 return &FrameLowering; 357 } 358 359 const SITargetLowering *getTargetLowering() const override { 360 return &TLInfo; 361 } 362 363 const CallLowering *getCallLowering() const override { 364 assert(GISel && "Access to GlobalISel APIs not set"); 365 return GISel->getCallLowering(); 366 } 367 368 const SIRegisterInfo *getRegisterInfo() const override { 369 return &InstrInfo.getRegisterInfo(); 370 } 371 372 void setGISelAccessor(GISelAccessor &GISel) { 373 this->GISel.reset(&GISel); 374 } 375 376 void overrideSchedPolicy(MachineSchedPolicy &Policy, 377 unsigned NumRegionInstrs) const override; 378 379 bool isVGPRSpillingEnabled(const Function& F) const; 380 381 unsigned getAmdKernelCodeChipID() const; 382 383 AMDGPU::IsaVersion getIsaVersion() const; 384 385 unsigned getMaxNumUserSGPRs() const { 386 return 16; 387 } 388 389 bool hasFlatAddressSpace() const { 390 return FlatAddressSpace; 391 } 392 393 bool hasSMemRealTime() const { 394 return HasSMemRealTime; 395 } 396 397 bool has16BitInsts() const { 398 return Has16BitInsts; 399 } 400 401 bool enableSIScheduler() const { 402 return EnableSIScheduler; 403 } 404 405 bool debuggerSupported() const { 406 return debuggerInsertNops() && debuggerReserveRegs() && 407 debuggerEmitPrologue(); 408 } 409 410 bool debuggerInsertNops() const { 411 return DebuggerInsertNops; 412 } 413 414 bool debuggerReserveRegs() const { 415 return DebuggerReserveRegs; 416 } 417 418 bool debuggerEmitPrologue() const { 419 return DebuggerEmitPrologue; 420 } 421 422 bool loadStoreOptEnabled() const { 423 return EnableLoadStoreOpt; 424 } 425 426 bool hasSGPRInitBug() const { 427 return SGPRInitBug; 428 } 429 }; 430 431 432 inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const { 433 if (getGeneration() >= SOUTHERN_ISLANDS) 434 return static_cast<const SISubtarget *>(this)->getInstrInfo(); 435 436 return static_cast<const R600Subtarget *>(this)->getInstrInfo(); 437 } 438 439 inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const { 440 if (getGeneration() >= SOUTHERN_ISLANDS) 441 return static_cast<const SISubtarget *>(this)->getFrameLowering(); 442 443 return static_cast<const R600Subtarget *>(this)->getFrameLowering(); 444 } 445 446 inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const { 447 if (getGeneration() >= SOUTHERN_ISLANDS) 448 return static_cast<const SISubtarget *>(this)->getTargetLowering(); 449 450 return static_cast<const R600Subtarget *>(this)->getTargetLowering(); 451 } 452 453 inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const { 454 if (getGeneration() >= SOUTHERN_ISLANDS) 455 return static_cast<const SISubtarget *>(this)->getRegisterInfo(); 456 457 return static_cast<const R600Subtarget *>(this)->getRegisterInfo(); 458 } 459 460 } // End namespace llvm 461 462 #endif 463