Home | History | Annotate | Download | only in Utils
      1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
     11 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
     12 
     13 #include "AMDGPU.h"
     14 #include "AMDKernelCodeT.h"
     15 #include "SIDefines.h"
     16 #include "llvm/ADT/StringRef.h"
     17 #include "llvm/IR/CallingConv.h"
     18 #include "llvm/MC/MCInstrDesc.h"
     19 #include "llvm/Support/AMDHSAKernelDescriptor.h"
     20 #include "llvm/Support/Compiler.h"
     21 #include "llvm/Support/ErrorHandling.h"
     22 #include <cstdint>
     23 #include <string>
     24 #include <utility>
     25 
     26 namespace llvm {
     27 
     28 class Argument;
     29 class FeatureBitset;
     30 class Function;
     31 class GlobalValue;
     32 class MCContext;
     33 class MCRegisterClass;
     34 class MCRegisterInfo;
     35 class MCSection;
     36 class MCSubtargetInfo;
     37 class MachineMemOperand;
     38 class Triple;
     39 
     40 namespace AMDGPU {
     41 
     42 #define GET_MIMGBaseOpcode_DECL
     43 #define GET_MIMGDim_DECL
     44 #define GET_MIMGEncoding_DECL
     45 #define GET_MIMGLZMapping_DECL
     46 #include "AMDGPUGenSearchableTables.inc"
     47 
     48 namespace IsaInfo {
     49 
     50 enum {
     51   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
     52   // doesn't spill SGPRs as much as when 80 is set.
     53   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
     54   TRAP_NUM_SGPRS = 16
     55 };
     56 
     57 /// Instruction set architecture version.
     58 struct IsaVersion {
     59   unsigned Major;
     60   unsigned Minor;
     61   unsigned Stepping;
     62 };
     63 
     64 /// \returns Isa version for given subtarget \p Features.
     65 IsaVersion getIsaVersion(const FeatureBitset &Features);
     66 
     67 /// Streams isa version string for given subtarget \p STI into \p Stream.
     68 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
     69 
     70 /// \returns True if given subtarget \p STI supports code object version 3,
     71 /// false otherwise.
     72 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
     73 
     74 /// \returns Wavefront size for given subtarget \p Features.
     75 unsigned getWavefrontSize(const FeatureBitset &Features);
     76 
     77 /// \returns Local memory size in bytes for given subtarget \p Features.
     78 unsigned getLocalMemorySize(const FeatureBitset &Features);
     79 
     80 /// \returns Number of execution units per compute unit for given subtarget \p
     81 /// Features.
     82 unsigned getEUsPerCU(const FeatureBitset &Features);
     83 
     84 /// \returns Maximum number of work groups per compute unit for given subtarget
     85 /// \p Features and limited by given \p FlatWorkGroupSize.
     86 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
     87                                unsigned FlatWorkGroupSize);
     88 
     89 /// \returns Maximum number of waves per compute unit for given subtarget \p
     90 /// Features without any kind of limitation.
     91 unsigned getMaxWavesPerCU(const FeatureBitset &Features);
     92 
     93 /// \returns Maximum number of waves per compute unit for given subtarget \p
     94 /// Features and limited by given \p FlatWorkGroupSize.
     95 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
     96                           unsigned FlatWorkGroupSize);
     97 
     98 /// \returns Minimum number of waves per execution unit for given subtarget \p
     99 /// Features.
    100 unsigned getMinWavesPerEU(const FeatureBitset &Features);
    101 
    102 /// \returns Maximum number of waves per execution unit for given subtarget \p
    103 /// Features without any kind of limitation.
    104 unsigned getMaxWavesPerEU();
    105 
    106 /// \returns Maximum number of waves per execution unit for given subtarget \p
    107 /// Features and limited by given \p FlatWorkGroupSize.
    108 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
    109                           unsigned FlatWorkGroupSize);
    110 
    111 /// \returns Minimum flat work group size for given subtarget \p Features.
    112 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
    113 
    114 /// \returns Maximum flat work group size for given subtarget \p Features.
    115 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
    116 
    117 /// \returns Number of waves per work group for given subtarget \p Features and
    118 /// limited by given \p FlatWorkGroupSize.
    119 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
    120                               unsigned FlatWorkGroupSize);
    121 
    122 /// \returns SGPR allocation granularity for given subtarget \p Features.
    123 unsigned getSGPRAllocGranule(const FeatureBitset &Features);
    124 
    125 /// \returns SGPR encoding granularity for given subtarget \p Features.
    126 unsigned getSGPREncodingGranule(const FeatureBitset &Features);
    127 
    128 /// \returns Total number of SGPRs for given subtarget \p Features.
    129 unsigned getTotalNumSGPRs(const FeatureBitset &Features);
    130 
    131 /// \returns Addressable number of SGPRs for given subtarget \p Features.
    132 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
    133 
    134 /// \returns Minimum number of SGPRs that meets the given number of waves per
    135 /// execution unit requirement for given subtarget \p Features.
    136 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
    137 
    138 /// \returns Maximum number of SGPRs that meets the given number of waves per
    139 /// execution unit requirement for given subtarget \p Features.
    140 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
    141                         bool Addressable);
    142 
    143 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
    144 /// Features when the given special registers are used.
    145 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
    146                           bool FlatScrUsed, bool XNACKUsed);
    147 
    148 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
    149 /// Features when the given special registers are used. XNACK is inferred from
    150 /// \p Features.
    151 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
    152                           bool FlatScrUsed);
    153 
    154 /// \returns Number of SGPR blocks needed for given subtarget \p Features when
    155 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
    156 /// register counts.
    157 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
    158 
    159 /// \returns VGPR allocation granularity for given subtarget \p Features.
    160 unsigned getVGPRAllocGranule(const FeatureBitset &Features);
    161 
    162 /// \returns VGPR encoding granularity for given subtarget \p Features.
    163 unsigned getVGPREncodingGranule(const FeatureBitset &Features);
    164 
    165 /// \returns Total number of VGPRs for given subtarget \p Features.
    166 unsigned getTotalNumVGPRs(const FeatureBitset &Features);
    167 
    168 /// \returns Addressable number of VGPRs for given subtarget \p Features.
    169 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
    170 
    171 /// \returns Minimum number of VGPRs that meets given number of waves per
    172 /// execution unit requirement for given subtarget \p Features.
    173 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
    174 
    175 /// \returns Maximum number of VGPRs that meets given number of waves per
    176 /// execution unit requirement for given subtarget \p Features.
    177 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
    178 
    179 /// \returns Number of VGPR blocks needed for given subtarget \p Features when
    180 /// \p NumVGPRs are used.
    181 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
    182 
    183 } // end namespace IsaInfo
    184 
    185 LLVM_READONLY
    186 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
    187 
    188 struct MIMGBaseOpcodeInfo {
    189   MIMGBaseOpcode BaseOpcode;
    190   bool Store;
    191   bool Atomic;
    192   bool AtomicX2;
    193   bool Sampler;
    194 
    195   uint8_t NumExtraArgs;
    196   bool Gradients;
    197   bool Coordinates;
    198   bool LodOrClampOrMip;
    199   bool HasD16;
    200 };
    201 
    202 LLVM_READONLY
    203 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
    204 
    205 struct MIMGDimInfo {
    206   MIMGDim Dim;
    207   uint8_t NumCoords;
    208   uint8_t NumGradients;
    209   bool DA;
    210 };
    211 
    212 LLVM_READONLY
    213 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
    214 
    215 struct MIMGLZMappingInfo {
    216   MIMGBaseOpcode L;
    217   MIMGBaseOpcode LZ;
    218 };
    219 
    220 LLVM_READONLY
    221 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
    222 
    223 LLVM_READONLY
    224 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
    225                   unsigned VDataDwords, unsigned VAddrDwords);
    226 
    227 LLVM_READONLY
    228 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
    229 
    230 LLVM_READONLY
    231 int getMCOpcode(uint16_t Opcode, unsigned Gen);
    232 
    233 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
    234                                const FeatureBitset &Features);
    235 
    236 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
    237 
    238 bool isGroupSegment(const GlobalValue *GV);
    239 bool isGlobalSegment(const GlobalValue *GV);
    240 bool isReadOnlySegment(const GlobalValue *GV);
    241 
    242 /// \returns True if constants should be emitted to .text section for given
    243 /// target triple \p TT, false otherwise.
    244 bool shouldEmitConstantsToTextSection(const Triple &TT);
    245 
    246 /// \returns Integer value requested using \p F's \p Name attribute.
    247 ///
    248 /// \returns \p Default if attribute is not present.
    249 ///
    250 /// \returns \p Default and emits error if requested value cannot be converted
    251 /// to integer.
    252 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
    253 
    254 /// \returns A pair of integer values requested using \p F's \p Name attribute
    255 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
    256 /// is false).
    257 ///
    258 /// \returns \p Default if attribute is not present.
    259 ///
    260 /// \returns \p Default and emits error if one of the requested values cannot be
    261 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
    262 /// not present.
    263 std::pair<int, int> getIntegerPairAttribute(const Function &F,
    264                                             StringRef Name,
    265                                             std::pair<int, int> Default,
    266                                             bool OnlyFirstRequired = false);
    267 
    268 /// \returns Vmcnt bit mask for given isa \p Version.
    269 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
    270 
    271 /// \returns Expcnt bit mask for given isa \p Version.
    272 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
    273 
    274 /// \returns Lgkmcnt bit mask for given isa \p Version.
    275 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
    276 
    277 /// \returns Waitcnt bit mask for given isa \p Version.
    278 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
    279 
    280 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
    281 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
    282 
    283 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
    284 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
    285 
    286 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
    287 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
    288 
    289 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
    290 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
    291 /// \p Lgkmcnt respectively.
    292 ///
    293 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
    294 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
    295 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
    296 ///     \p Expcnt = \p Waitcnt[6:4]
    297 ///     \p Lgkmcnt = \p Waitcnt[11:8]
    298 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
    299                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
    300 
    301 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
    302 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
    303                      unsigned Vmcnt);
    304 
    305 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
    306 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
    307                       unsigned Expcnt);
    308 
    309 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
    310 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
    311                        unsigned Lgkmcnt);
    312 
    313 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
    314 /// \p Version.
    315 ///
    316 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
    317 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
    318 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
    319 ///     Waitcnt[6:4]   = \p Expcnt
    320 ///     Waitcnt[11:8]  = \p Lgkmcnt
    321 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
    322 ///
    323 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
    324 /// isa \p Version.
    325 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
    326                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
    327 
    328 unsigned getInitialPSInputAddr(const Function &F);
    329 
    330 LLVM_READNONE
    331 bool isShader(CallingConv::ID CC);
    332 
    333 LLVM_READNONE
    334 bool isCompute(CallingConv::ID CC);
    335 
    336 LLVM_READNONE
    337 bool isEntryFunctionCC(CallingConv::ID CC);
    338 
    339 // FIXME: Remove this when calling conventions cleaned up
    340 LLVM_READNONE
    341 inline bool isKernel(CallingConv::ID CC) {
    342   switch (CC) {
    343   case CallingConv::AMDGPU_KERNEL:
    344   case CallingConv::SPIR_KERNEL:
    345     return true;
    346   default:
    347     return false;
    348   }
    349 }
    350 
    351 bool hasXNACK(const MCSubtargetInfo &STI);
    352 bool hasMIMG_R128(const MCSubtargetInfo &STI);
    353 bool hasPackedD16(const MCSubtargetInfo &STI);
    354 
    355 bool isSI(const MCSubtargetInfo &STI);
    356 bool isCI(const MCSubtargetInfo &STI);
    357 bool isVI(const MCSubtargetInfo &STI);
    358 bool isGFX9(const MCSubtargetInfo &STI);
    359 
    360 /// Is Reg - scalar register
    361 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
    362 
    363 /// Is there any intersection between registers
    364 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
    365 
    366 /// If \p Reg is a pseudo reg, return the correct hardware register given
    367 /// \p STI otherwise return \p Reg.
    368 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
    369 
    370 /// Convert hardware register \p Reg to a pseudo register
    371 LLVM_READNONE
    372 unsigned mc2PseudoReg(unsigned Reg);
    373 
    374 /// Can this operand also contain immediate values?
    375 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
    376 
    377 /// Is this floating-point operand?
    378 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
    379 
    380 /// Does this opearnd support only inlinable literals?
    381 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
    382 
    383 /// Get the size in bits of a register from the register class \p RC.
    384 unsigned getRegBitWidth(unsigned RCID);
    385 
    386 /// Get the size in bits of a register from the register class \p RC.
    387 unsigned getRegBitWidth(const MCRegisterClass &RC);
    388 
    389 /// Get size of register operand
    390 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
    391                            unsigned OpNo);
    392 
    393 LLVM_READNONE
    394 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
    395   switch (OpInfo.OperandType) {
    396   case AMDGPU::OPERAND_REG_IMM_INT32:
    397   case AMDGPU::OPERAND_REG_IMM_FP32:
    398   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    399   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    400     return 4;
    401 
    402   case AMDGPU::OPERAND_REG_IMM_INT64:
    403   case AMDGPU::OPERAND_REG_IMM_FP64:
    404   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
    405   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
    406     return 8;
    407 
    408   case AMDGPU::OPERAND_REG_IMM_INT16:
    409   case AMDGPU::OPERAND_REG_IMM_FP16:
    410   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
    411   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    412   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
    413   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
    414     return 2;
    415 
    416   default:
    417     llvm_unreachable("unhandled operand type");
    418   }
    419 }
    420 
    421 LLVM_READNONE
    422 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
    423   return getOperandSize(Desc.OpInfo[OpNo]);
    424 }
    425 
    426 /// Is this literal inlinable
    427 LLVM_READNONE
    428 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
    429 
    430 LLVM_READNONE
    431 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
    432 
    433 LLVM_READNONE
    434 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
    435 
    436 LLVM_READNONE
    437 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
    438 
    439 bool isArgPassedInSGPR(const Argument *Arg);
    440 
    441 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
    442 /// offset field.
    443 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
    444 
    445 /// \returns true if this offset is small enough to fit in the SMRD
    446 /// offset field.  \p ByteOffset should be the offset in bytes and
    447 /// not the encoded offset.
    448 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
    449 
    450 /// \returns true if the intrinsic is divergent
    451 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
    452 
    453 } // end namespace AMDGPU
    454 } // end namespace llvm
    455 
    456 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
    457