1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 12 13 #include "AMDGPU.h" 14 #include "AMDKernelCodeT.h" 15 #include "SIDefines.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/IR/CallingConv.h" 18 #include "llvm/MC/MCInstrDesc.h" 19 #include "llvm/Support/AMDHSAKernelDescriptor.h" 20 #include "llvm/Support/Compiler.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include <cstdint> 23 #include <string> 24 #include <utility> 25 26 namespace llvm { 27 28 class Argument; 29 class FeatureBitset; 30 class Function; 31 class GlobalValue; 32 class MCContext; 33 class MCRegisterClass; 34 class MCRegisterInfo; 35 class MCSection; 36 class MCSubtargetInfo; 37 class MachineMemOperand; 38 class Triple; 39 40 namespace AMDGPU { 41 42 #define GET_MIMGBaseOpcode_DECL 43 #define GET_MIMGDim_DECL 44 #define GET_MIMGEncoding_DECL 45 #define GET_MIMGLZMapping_DECL 46 #include "AMDGPUGenSearchableTables.inc" 47 48 namespace IsaInfo { 49 50 enum { 51 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 52 // doesn't spill SGPRs as much as when 80 is set. 53 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 54 TRAP_NUM_SGPRS = 16 55 }; 56 57 /// Instruction set architecture version. 58 struct IsaVersion { 59 unsigned Major; 60 unsigned Minor; 61 unsigned Stepping; 62 }; 63 64 /// \returns Isa version for given subtarget \p Features. 65 IsaVersion getIsaVersion(const FeatureBitset &Features); 66 67 /// Streams isa version string for given subtarget \p STI into \p Stream. 68 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); 69 70 /// \returns True if given subtarget \p STI supports code object version 3, 71 /// false otherwise. 72 bool hasCodeObjectV3(const MCSubtargetInfo *STI); 73 74 /// \returns Wavefront size for given subtarget \p Features. 75 unsigned getWavefrontSize(const FeatureBitset &Features); 76 77 /// \returns Local memory size in bytes for given subtarget \p Features. 78 unsigned getLocalMemorySize(const FeatureBitset &Features); 79 80 /// \returns Number of execution units per compute unit for given subtarget \p 81 /// Features. 82 unsigned getEUsPerCU(const FeatureBitset &Features); 83 84 /// \returns Maximum number of work groups per compute unit for given subtarget 85 /// \p Features and limited by given \p FlatWorkGroupSize. 86 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 87 unsigned FlatWorkGroupSize); 88 89 /// \returns Maximum number of waves per compute unit for given subtarget \p 90 /// Features without any kind of limitation. 91 unsigned getMaxWavesPerCU(const FeatureBitset &Features); 92 93 /// \returns Maximum number of waves per compute unit for given subtarget \p 94 /// Features and limited by given \p FlatWorkGroupSize. 95 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 96 unsigned FlatWorkGroupSize); 97 98 /// \returns Minimum number of waves per execution unit for given subtarget \p 99 /// Features. 100 unsigned getMinWavesPerEU(const FeatureBitset &Features); 101 102 /// \returns Maximum number of waves per execution unit for given subtarget \p 103 /// Features without any kind of limitation. 104 unsigned getMaxWavesPerEU(); 105 106 /// \returns Maximum number of waves per execution unit for given subtarget \p 107 /// Features and limited by given \p FlatWorkGroupSize. 108 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 109 unsigned FlatWorkGroupSize); 110 111 /// \returns Minimum flat work group size for given subtarget \p Features. 112 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features); 113 114 /// \returns Maximum flat work group size for given subtarget \p Features. 115 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features); 116 117 /// \returns Number of waves per work group for given subtarget \p Features and 118 /// limited by given \p FlatWorkGroupSize. 119 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 120 unsigned FlatWorkGroupSize); 121 122 /// \returns SGPR allocation granularity for given subtarget \p Features. 123 unsigned getSGPRAllocGranule(const FeatureBitset &Features); 124 125 /// \returns SGPR encoding granularity for given subtarget \p Features. 126 unsigned getSGPREncodingGranule(const FeatureBitset &Features); 127 128 /// \returns Total number of SGPRs for given subtarget \p Features. 129 unsigned getTotalNumSGPRs(const FeatureBitset &Features); 130 131 /// \returns Addressable number of SGPRs for given subtarget \p Features. 132 unsigned getAddressableNumSGPRs(const FeatureBitset &Features); 133 134 /// \returns Minimum number of SGPRs that meets the given number of waves per 135 /// execution unit requirement for given subtarget \p Features. 136 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU); 137 138 /// \returns Maximum number of SGPRs that meets the given number of waves per 139 /// execution unit requirement for given subtarget \p Features. 140 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 141 bool Addressable); 142 143 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 144 /// Features when the given special registers are used. 145 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, 146 bool FlatScrUsed, bool XNACKUsed); 147 148 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 149 /// Features when the given special registers are used. XNACK is inferred from 150 /// \p Features. 151 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, 152 bool FlatScrUsed); 153 154 /// \returns Number of SGPR blocks needed for given subtarget \p Features when 155 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 156 /// register counts. 157 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); 158 159 /// \returns VGPR allocation granularity for given subtarget \p Features. 160 unsigned getVGPRAllocGranule(const FeatureBitset &Features); 161 162 /// \returns VGPR encoding granularity for given subtarget \p Features. 163 unsigned getVGPREncodingGranule(const FeatureBitset &Features); 164 165 /// \returns Total number of VGPRs for given subtarget \p Features. 166 unsigned getTotalNumVGPRs(const FeatureBitset &Features); 167 168 /// \returns Addressable number of VGPRs for given subtarget \p Features. 169 unsigned getAddressableNumVGPRs(const FeatureBitset &Features); 170 171 /// \returns Minimum number of VGPRs that meets given number of waves per 172 /// execution unit requirement for given subtarget \p Features. 173 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); 174 175 /// \returns Maximum number of VGPRs that meets given number of waves per 176 /// execution unit requirement for given subtarget \p Features. 177 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); 178 179 /// \returns Number of VGPR blocks needed for given subtarget \p Features when 180 /// \p NumVGPRs are used. 181 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); 182 183 } // end namespace IsaInfo 184 185 LLVM_READONLY 186 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 187 188 struct MIMGBaseOpcodeInfo { 189 MIMGBaseOpcode BaseOpcode; 190 bool Store; 191 bool Atomic; 192 bool AtomicX2; 193 bool Sampler; 194 195 uint8_t NumExtraArgs; 196 bool Gradients; 197 bool Coordinates; 198 bool LodOrClampOrMip; 199 bool HasD16; 200 }; 201 202 LLVM_READONLY 203 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 204 205 struct MIMGDimInfo { 206 MIMGDim Dim; 207 uint8_t NumCoords; 208 uint8_t NumGradients; 209 bool DA; 210 }; 211 212 LLVM_READONLY 213 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim); 214 215 struct MIMGLZMappingInfo { 216 MIMGBaseOpcode L; 217 MIMGBaseOpcode LZ; 218 }; 219 220 LLVM_READONLY 221 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 222 223 LLVM_READONLY 224 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 225 unsigned VDataDwords, unsigned VAddrDwords); 226 227 LLVM_READONLY 228 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 229 230 LLVM_READONLY 231 int getMCOpcode(uint16_t Opcode, unsigned Gen); 232 233 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 234 const FeatureBitset &Features); 235 236 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(); 237 238 bool isGroupSegment(const GlobalValue *GV); 239 bool isGlobalSegment(const GlobalValue *GV); 240 bool isReadOnlySegment(const GlobalValue *GV); 241 242 /// \returns True if constants should be emitted to .text section for given 243 /// target triple \p TT, false otherwise. 244 bool shouldEmitConstantsToTextSection(const Triple &TT); 245 246 /// \returns Integer value requested using \p F's \p Name attribute. 247 /// 248 /// \returns \p Default if attribute is not present. 249 /// 250 /// \returns \p Default and emits error if requested value cannot be converted 251 /// to integer. 252 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 253 254 /// \returns A pair of integer values requested using \p F's \p Name attribute 255 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 256 /// is false). 257 /// 258 /// \returns \p Default if attribute is not present. 259 /// 260 /// \returns \p Default and emits error if one of the requested values cannot be 261 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 262 /// not present. 263 std::pair<int, int> getIntegerPairAttribute(const Function &F, 264 StringRef Name, 265 std::pair<int, int> Default, 266 bool OnlyFirstRequired = false); 267 268 /// \returns Vmcnt bit mask for given isa \p Version. 269 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version); 270 271 /// \returns Expcnt bit mask for given isa \p Version. 272 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version); 273 274 /// \returns Lgkmcnt bit mask for given isa \p Version. 275 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version); 276 277 /// \returns Waitcnt bit mask for given isa \p Version. 278 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version); 279 280 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 281 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); 282 283 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 284 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); 285 286 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 287 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); 288 289 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 290 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 291 /// \p Lgkmcnt respectively. 292 /// 293 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 294 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 295 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 296 /// \p Expcnt = \p Waitcnt[6:4] 297 /// \p Lgkmcnt = \p Waitcnt[11:8] 298 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 299 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 300 301 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 302 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 303 unsigned Vmcnt); 304 305 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 306 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 307 unsigned Expcnt); 308 309 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 310 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 311 unsigned Lgkmcnt); 312 313 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 314 /// \p Version. 315 /// 316 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 317 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 318 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 319 /// Waitcnt[6:4] = \p Expcnt 320 /// Waitcnt[11:8] = \p Lgkmcnt 321 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 322 /// 323 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 324 /// isa \p Version. 325 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 326 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 327 328 unsigned getInitialPSInputAddr(const Function &F); 329 330 LLVM_READNONE 331 bool isShader(CallingConv::ID CC); 332 333 LLVM_READNONE 334 bool isCompute(CallingConv::ID CC); 335 336 LLVM_READNONE 337 bool isEntryFunctionCC(CallingConv::ID CC); 338 339 // FIXME: Remove this when calling conventions cleaned up 340 LLVM_READNONE 341 inline bool isKernel(CallingConv::ID CC) { 342 switch (CC) { 343 case CallingConv::AMDGPU_KERNEL: 344 case CallingConv::SPIR_KERNEL: 345 return true; 346 default: 347 return false; 348 } 349 } 350 351 bool hasXNACK(const MCSubtargetInfo &STI); 352 bool hasMIMG_R128(const MCSubtargetInfo &STI); 353 bool hasPackedD16(const MCSubtargetInfo &STI); 354 355 bool isSI(const MCSubtargetInfo &STI); 356 bool isCI(const MCSubtargetInfo &STI); 357 bool isVI(const MCSubtargetInfo &STI); 358 bool isGFX9(const MCSubtargetInfo &STI); 359 360 /// Is Reg - scalar register 361 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 362 363 /// Is there any intersection between registers 364 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 365 366 /// If \p Reg is a pseudo reg, return the correct hardware register given 367 /// \p STI otherwise return \p Reg. 368 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 369 370 /// Convert hardware register \p Reg to a pseudo register 371 LLVM_READNONE 372 unsigned mc2PseudoReg(unsigned Reg); 373 374 /// Can this operand also contain immediate values? 375 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 376 377 /// Is this floating-point operand? 378 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 379 380 /// Does this opearnd support only inlinable literals? 381 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 382 383 /// Get the size in bits of a register from the register class \p RC. 384 unsigned getRegBitWidth(unsigned RCID); 385 386 /// Get the size in bits of a register from the register class \p RC. 387 unsigned getRegBitWidth(const MCRegisterClass &RC); 388 389 /// Get size of register operand 390 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 391 unsigned OpNo); 392 393 LLVM_READNONE 394 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 395 switch (OpInfo.OperandType) { 396 case AMDGPU::OPERAND_REG_IMM_INT32: 397 case AMDGPU::OPERAND_REG_IMM_FP32: 398 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 399 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 400 return 4; 401 402 case AMDGPU::OPERAND_REG_IMM_INT64: 403 case AMDGPU::OPERAND_REG_IMM_FP64: 404 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 405 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 406 return 8; 407 408 case AMDGPU::OPERAND_REG_IMM_INT16: 409 case AMDGPU::OPERAND_REG_IMM_FP16: 410 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 411 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 412 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 413 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 414 return 2; 415 416 default: 417 llvm_unreachable("unhandled operand type"); 418 } 419 } 420 421 LLVM_READNONE 422 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 423 return getOperandSize(Desc.OpInfo[OpNo]); 424 } 425 426 /// Is this literal inlinable 427 LLVM_READNONE 428 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 429 430 LLVM_READNONE 431 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 432 433 LLVM_READNONE 434 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 435 436 LLVM_READNONE 437 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 438 439 bool isArgPassedInSGPR(const Argument *Arg); 440 441 /// \returns The encoding that will be used for \p ByteOffset in the SMRD 442 /// offset field. 443 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 444 445 /// \returns true if this offset is small enough to fit in the SMRD 446 /// offset field. \p ByteOffset should be the offset in bytes and 447 /// not the encoded offset. 448 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 449 450 /// \returns true if the intrinsic is divergent 451 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 452 453 } // end namespace AMDGPU 454 } // end namespace llvm 455 456 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 457