Home | History | Annotate | Download | only in Utils
      1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "AMDGPUBaseInfo.h"
     11 #include "AMDGPUTargetTransformInfo.h"
     12 #include "AMDGPU.h"
     13 #include "SIDefines.h"
     14 #include "llvm/ADT/StringRef.h"
     15 #include "llvm/ADT/Triple.h"
     16 #include "llvm/BinaryFormat/ELF.h"
     17 #include "llvm/CodeGen/MachineMemOperand.h"
     18 #include "llvm/IR/Attributes.h"
     19 #include "llvm/IR/Constants.h"
     20 #include "llvm/IR/Function.h"
     21 #include "llvm/IR/GlobalValue.h"
     22 #include "llvm/IR/Instruction.h"
     23 #include "llvm/IR/LLVMContext.h"
     24 #include "llvm/IR/Module.h"
     25 #include "llvm/MC/MCContext.h"
     26 #include "llvm/MC/MCInstrDesc.h"
     27 #include "llvm/MC/MCInstrInfo.h"
     28 #include "llvm/MC/MCRegisterInfo.h"
     29 #include "llvm/MC/MCSectionELF.h"
     30 #include "llvm/MC/MCSubtargetInfo.h"
     31 #include "llvm/MC/SubtargetFeature.h"
     32 #include "llvm/Support/Casting.h"
     33 #include "llvm/Support/ErrorHandling.h"
     34 #include "llvm/Support/MathExtras.h"
     35 #include <algorithm>
     36 #include <cassert>
     37 #include <cstdint>
     38 #include <cstring>
     39 #include <utility>
     40 
     41 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     42 
     43 #define GET_INSTRINFO_NAMED_OPS
     44 #define GET_INSTRMAP_INFO
     45 #include "AMDGPUGenInstrInfo.inc"
     46 #undef GET_INSTRMAP_INFO
     47 #undef GET_INSTRINFO_NAMED_OPS
     48 
     49 namespace {
     50 
     51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
     52 unsigned getBitMask(unsigned Shift, unsigned Width) {
     53   return ((1 << Width) - 1) << Shift;
     54 }
     55 
     56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
     57 ///
     58 /// \returns Packed \p Dst.
     59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
     60   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
     61   Dst |= (Src << Shift) & getBitMask(Shift, Width);
     62   return Dst;
     63 }
     64 
     65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
     66 ///
     67 /// \returns Unpacked bits.
     68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
     69   return (Src & getBitMask(Shift, Width)) >> Shift;
     70 }
     71 
     72 /// \returns Vmcnt bit shift (lower bits).
     73 unsigned getVmcntBitShiftLo() { return 0; }
     74 
     75 /// \returns Vmcnt bit width (lower bits).
     76 unsigned getVmcntBitWidthLo() { return 4; }
     77 
     78 /// \returns Expcnt bit shift.
     79 unsigned getExpcntBitShift() { return 4; }
     80 
     81 /// \returns Expcnt bit width.
     82 unsigned getExpcntBitWidth() { return 3; }
     83 
     84 /// \returns Lgkmcnt bit shift.
     85 unsigned getLgkmcntBitShift() { return 8; }
     86 
     87 /// \returns Lgkmcnt bit width.
     88 unsigned getLgkmcntBitWidth() { return 4; }
     89 
     90 /// \returns Vmcnt bit shift (higher bits).
     91 unsigned getVmcntBitShiftHi() { return 14; }
     92 
     93 /// \returns Vmcnt bit width (higher bits).
     94 unsigned getVmcntBitWidthHi() { return 2; }
     95 
     96 } // end namespace anonymous
     97 
     98 namespace llvm {
     99 
    100 namespace AMDGPU {
    101 
    102 struct MIMGInfo {
    103   uint16_t Opcode;
    104   uint16_t BaseOpcode;
    105   uint8_t MIMGEncoding;
    106   uint8_t VDataDwords;
    107   uint8_t VAddrDwords;
    108 };
    109 
    110 #define GET_MIMGBaseOpcodesTable_IMPL
    111 #define GET_MIMGDimInfoTable_IMPL
    112 #define GET_MIMGInfoTable_IMPL
    113 #define GET_MIMGLZMappingTable_IMPL
    114 #include "AMDGPUGenSearchableTables.inc"
    115 
    116 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
    117                   unsigned VDataDwords, unsigned VAddrDwords) {
    118   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
    119                                              VDataDwords, VAddrDwords);
    120   return Info ? Info->Opcode : -1;
    121 }
    122 
    123 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
    124   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
    125   const MIMGInfo *NewInfo =
    126       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
    127                           NewChannels, OrigInfo->VAddrDwords);
    128   return NewInfo ? NewInfo->Opcode : -1;
    129 }
    130 
    131 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
    132 // header files, so we need to wrap it in a function that takes unsigned
    133 // instead.
    134 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
    135   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
    136 }
    137 
    138 namespace IsaInfo {
    139 
    140 IsaVersion getIsaVersion(const FeatureBitset &Features) {
    141   // GCN GFX6 (Southern Islands (SI)).
    142   if (Features.test(FeatureISAVersion6_0_0))
    143     return {6, 0, 0};
    144   if (Features.test(FeatureISAVersion6_0_1))
    145     return {6, 0, 1};
    146 
    147   // GCN GFX7 (Sea Islands (CI)).
    148   if (Features.test(FeatureISAVersion7_0_0))
    149     return {7, 0, 0};
    150   if (Features.test(FeatureISAVersion7_0_1))
    151     return {7, 0, 1};
    152   if (Features.test(FeatureISAVersion7_0_2))
    153     return {7, 0, 2};
    154   if (Features.test(FeatureISAVersion7_0_3))
    155     return {7, 0, 3};
    156   if (Features.test(FeatureISAVersion7_0_4))
    157     return {7, 0, 4};
    158   if (Features.test(FeatureSeaIslands))
    159     return {7, 0, 0};
    160 
    161   // GCN GFX8 (Volcanic Islands (VI)).
    162   if (Features.test(FeatureISAVersion8_0_1))
    163     return {8, 0, 1};
    164   if (Features.test(FeatureISAVersion8_0_2))
    165     return {8, 0, 2};
    166   if (Features.test(FeatureISAVersion8_0_3))
    167     return {8, 0, 3};
    168   if (Features.test(FeatureISAVersion8_1_0))
    169     return {8, 1, 0};
    170   if (Features.test(FeatureVolcanicIslands))
    171     return {8, 0, 0};
    172 
    173   // GCN GFX9.
    174   if (Features.test(FeatureISAVersion9_0_0))
    175     return {9, 0, 0};
    176   if (Features.test(FeatureISAVersion9_0_2))
    177     return {9, 0, 2};
    178   if (Features.test(FeatureISAVersion9_0_4))
    179     return {9, 0, 4};
    180   if (Features.test(FeatureISAVersion9_0_6))
    181     return {9, 0, 6};
    182   if (Features.test(FeatureGFX9))
    183     return {9, 0, 0};
    184 
    185   if (Features.test(FeatureSouthernIslands))
    186     return {0, 0, 0};
    187   return {7, 0, 0};
    188 }
    189 
    190 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
    191   auto TargetTriple = STI->getTargetTriple();
    192   auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
    193 
    194   Stream << TargetTriple.getArchName() << '-'
    195          << TargetTriple.getVendorName() << '-'
    196          << TargetTriple.getOSName() << '-'
    197          << TargetTriple.getEnvironmentName() << '-'
    198          << "gfx"
    199          << ISAVersion.Major
    200          << ISAVersion.Minor
    201          << ISAVersion.Stepping;
    202 
    203   if (hasXNACK(*STI))
    204     Stream << "+xnack";
    205 
    206   Stream.flush();
    207 }
    208 
    209 bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
    210   return STI->getFeatureBits().test(FeatureCodeObjectV3);
    211 }
    212 
    213 unsigned getWavefrontSize(const FeatureBitset &Features) {
    214   if (Features.test(FeatureWavefrontSize16))
    215     return 16;
    216   if (Features.test(FeatureWavefrontSize32))
    217     return 32;
    218 
    219   return 64;
    220 }
    221 
    222 unsigned getLocalMemorySize(const FeatureBitset &Features) {
    223   if (Features.test(FeatureLocalMemorySize32768))
    224     return 32768;
    225   if (Features.test(FeatureLocalMemorySize65536))
    226     return 65536;
    227 
    228   return 0;
    229 }
    230 
    231 unsigned getEUsPerCU(const FeatureBitset &Features) {
    232   return 4;
    233 }
    234 
    235 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
    236                                unsigned FlatWorkGroupSize) {
    237   if (!Features.test(FeatureGCN))
    238     return 8;
    239   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
    240   if (N == 1)
    241     return 40;
    242   N = 40 / N;
    243   return std::min(N, 16u);
    244 }
    245 
    246 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
    247   return getMaxWavesPerEU() * getEUsPerCU(Features);
    248 }
    249 
    250 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
    251                           unsigned FlatWorkGroupSize) {
    252   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
    253 }
    254 
    255 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
    256   return 1;
    257 }
    258 
    259 unsigned getMaxWavesPerEU() {
    260   // FIXME: Need to take scratch memory into account.
    261   return 10;
    262 }
    263 
    264 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
    265                           unsigned FlatWorkGroupSize) {
    266   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
    267                  getEUsPerCU(Features)) / getEUsPerCU(Features);
    268 }
    269 
    270 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
    271   return 1;
    272 }
    273 
    274 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
    275   return 2048;
    276 }
    277 
    278 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
    279                               unsigned FlatWorkGroupSize) {
    280   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
    281                  getWavefrontSize(Features);
    282 }
    283 
    284 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
    285   IsaVersion Version = getIsaVersion(Features);
    286   if (Version.Major >= 8)
    287     return 16;
    288   return 8;
    289 }
    290 
    291 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
    292   return 8;
    293 }
    294 
    295 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
    296   IsaVersion Version = getIsaVersion(Features);
    297   if (Version.Major >= 8)
    298     return 800;
    299   return 512;
    300 }
    301 
    302 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
    303   if (Features.test(FeatureSGPRInitBug))
    304     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
    305 
    306   IsaVersion Version = getIsaVersion(Features);
    307   if (Version.Major >= 8)
    308     return 102;
    309   return 104;
    310 }
    311 
    312 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
    313   assert(WavesPerEU != 0);
    314 
    315   if (WavesPerEU >= getMaxWavesPerEU())
    316     return 0;
    317 
    318   unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
    319   if (Features.test(FeatureTrapHandler))
    320     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
    321   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
    322   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
    323 }
    324 
    325 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
    326                         bool Addressable) {
    327   assert(WavesPerEU != 0);
    328 
    329   IsaVersion Version = getIsaVersion(Features);
    330   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
    331   if (Version.Major >= 8 && !Addressable)
    332     AddressableNumSGPRs = 112;
    333   unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
    334   if (Features.test(FeatureTrapHandler))
    335     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
    336   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
    337   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
    338 }
    339 
    340 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
    341                           bool FlatScrUsed, bool XNACKUsed) {
    342   unsigned ExtraSGPRs = 0;
    343   if (VCCUsed)
    344     ExtraSGPRs = 2;
    345 
    346   IsaVersion Version = getIsaVersion(Features);
    347   if (Version.Major < 8) {
    348     if (FlatScrUsed)
    349       ExtraSGPRs = 4;
    350   } else {
    351     if (XNACKUsed)
    352       ExtraSGPRs = 4;
    353 
    354     if (FlatScrUsed)
    355       ExtraSGPRs = 6;
    356   }
    357 
    358   return ExtraSGPRs;
    359 }
    360 
    361 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
    362                           bool FlatScrUsed) {
    363   return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
    364                           Features[AMDGPU::FeatureXNACK]);
    365 }
    366 
    367 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
    368   NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
    369   // SGPRBlocks is actual number of SGPR blocks minus 1.
    370   return NumSGPRs / getSGPREncodingGranule(Features) - 1;
    371 }
    372 
    373 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
    374   return 4;
    375 }
    376 
    377 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
    378   return getVGPRAllocGranule(Features);
    379 }
    380 
    381 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
    382   return 256;
    383 }
    384 
    385 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
    386   return getTotalNumVGPRs(Features);
    387 }
    388 
    389 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
    390   assert(WavesPerEU != 0);
    391 
    392   if (WavesPerEU >= getMaxWavesPerEU())
    393     return 0;
    394   unsigned MinNumVGPRs =
    395       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
    396                 getVGPRAllocGranule(Features)) + 1;
    397   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
    398 }
    399 
    400 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
    401   assert(WavesPerEU != 0);
    402 
    403   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
    404                                    getVGPRAllocGranule(Features));
    405   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
    406   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
    407 }
    408 
    409 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
    410   NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
    411   // VGPRBlocks is actual number of VGPR blocks minus 1.
    412   return NumVGPRs / getVGPREncodingGranule(Features) - 1;
    413 }
    414 
    415 } // end namespace IsaInfo
    416 
    417 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
    418                                const FeatureBitset &Features) {
    419   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
    420 
    421   memset(&Header, 0, sizeof(Header));
    422 
    423   Header.amd_kernel_code_version_major = 1;
    424   Header.amd_kernel_code_version_minor = 2;
    425   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
    426   Header.amd_machine_version_major = ISA.Major;
    427   Header.amd_machine_version_minor = ISA.Minor;
    428   Header.amd_machine_version_stepping = ISA.Stepping;
    429   Header.kernel_code_entry_byte_offset = sizeof(Header);
    430   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
    431   Header.wavefront_size = 6;
    432 
    433   // If the code object does not support indirect functions, then the value must
    434   // be 0xffffffff.
    435   Header.call_convention = -1;
    436 
    437   // These alignment values are specified in powers of two, so alignment =
    438   // 2^n.  The minimum alignment is 2^4 = 16.
    439   Header.kernarg_segment_alignment = 4;
    440   Header.group_segment_alignment = 4;
    441   Header.private_segment_alignment = 4;
    442 }
    443 
    444 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
    445   amdhsa::kernel_descriptor_t KD;
    446   memset(&KD, 0, sizeof(KD));
    447   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
    448                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
    449                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
    450   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
    451                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
    452   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
    453                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
    454   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
    455                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
    456   return KD;
    457 }
    458 
    459 bool isGroupSegment(const GlobalValue *GV) {
    460   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
    461 }
    462 
    463 bool isGlobalSegment(const GlobalValue *GV) {
    464   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
    465 }
    466 
    467 bool isReadOnlySegment(const GlobalValue *GV) {
    468   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
    469          GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
    470 }
    471 
    472 bool shouldEmitConstantsToTextSection(const Triple &TT) {
    473   return TT.getOS() != Triple::AMDHSA;
    474 }
    475 
    476 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
    477   Attribute A = F.getFnAttribute(Name);
    478   int Result = Default;
    479 
    480   if (A.isStringAttribute()) {
    481     StringRef Str = A.getValueAsString();
    482     if (Str.getAsInteger(0, Result)) {
    483       LLVMContext &Ctx = F.getContext();
    484       Ctx.emitError("can't parse integer attribute " + Name);
    485     }
    486   }
    487 
    488   return Result;
    489 }
    490 
    491 std::pair<int, int> getIntegerPairAttribute(const Function &F,
    492                                             StringRef Name,
    493                                             std::pair<int, int> Default,
    494                                             bool OnlyFirstRequired) {
    495   Attribute A = F.getFnAttribute(Name);
    496   if (!A.isStringAttribute())
    497     return Default;
    498 
    499   LLVMContext &Ctx = F.getContext();
    500   std::pair<int, int> Ints = Default;
    501   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
    502   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
    503     Ctx.emitError("can't parse first integer attribute " + Name);
    504     return Default;
    505   }
    506   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
    507     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
    508       Ctx.emitError("can't parse second integer attribute " + Name);
    509       return Default;
    510     }
    511   }
    512 
    513   return Ints;
    514 }
    515 
    516 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
    517   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
    518   if (Version.Major < 9)
    519     return VmcntLo;
    520 
    521   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
    522   return VmcntLo | VmcntHi;
    523 }
    524 
    525 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
    526   return (1 << getExpcntBitWidth()) - 1;
    527 }
    528 
    529 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
    530   return (1 << getLgkmcntBitWidth()) - 1;
    531 }
    532 
    533 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
    534   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
    535   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
    536   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
    537   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
    538   if (Version.Major < 9)
    539     return Waitcnt;
    540 
    541   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
    542   return Waitcnt | VmcntHi;
    543 }
    544 
    545 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
    546   unsigned VmcntLo =
    547       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
    548   if (Version.Major < 9)
    549     return VmcntLo;
    550 
    551   unsigned VmcntHi =
    552       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
    553   VmcntHi <<= getVmcntBitWidthLo();
    554   return VmcntLo | VmcntHi;
    555 }
    556 
    557 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
    558   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
    559 }
    560 
    561 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
    562   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
    563 }
    564 
    565 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
    566                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
    567   Vmcnt = decodeVmcnt(Version, Waitcnt);
    568   Expcnt = decodeExpcnt(Version, Waitcnt);
    569   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
    570 }
    571 
    572 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
    573                      unsigned Vmcnt) {
    574   Waitcnt =
    575       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
    576   if (Version.Major < 9)
    577     return Waitcnt;
    578 
    579   Vmcnt >>= getVmcntBitWidthLo();
    580   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
    581 }
    582 
    583 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
    584                       unsigned Expcnt) {
    585   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
    586 }
    587 
    588 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
    589                        unsigned Lgkmcnt) {
    590   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
    591 }
    592 
    593 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
    594                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
    595   unsigned Waitcnt = getWaitcntBitMask(Version);
    596   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
    597   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
    598   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
    599   return Waitcnt;
    600 }
    601 
    602 unsigned getInitialPSInputAddr(const Function &F) {
    603   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
    604 }
    605 
    606 bool isShader(CallingConv::ID cc) {
    607   switch(cc) {
    608     case CallingConv::AMDGPU_VS:
    609     case CallingConv::AMDGPU_LS:
    610     case CallingConv::AMDGPU_HS:
    611     case CallingConv::AMDGPU_ES:
    612     case CallingConv::AMDGPU_GS:
    613     case CallingConv::AMDGPU_PS:
    614     case CallingConv::AMDGPU_CS:
    615       return true;
    616     default:
    617       return false;
    618   }
    619 }
    620 
    621 bool isCompute(CallingConv::ID cc) {
    622   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
    623 }
    624 
    625 bool isEntryFunctionCC(CallingConv::ID CC) {
    626   switch (CC) {
    627   case CallingConv::AMDGPU_KERNEL:
    628   case CallingConv::SPIR_KERNEL:
    629   case CallingConv::AMDGPU_VS:
    630   case CallingConv::AMDGPU_GS:
    631   case CallingConv::AMDGPU_PS:
    632   case CallingConv::AMDGPU_CS:
    633   case CallingConv::AMDGPU_ES:
    634   case CallingConv::AMDGPU_HS:
    635   case CallingConv::AMDGPU_LS:
    636     return true;
    637   default:
    638     return false;
    639   }
    640 }
    641 
    642 bool hasXNACK(const MCSubtargetInfo &STI) {
    643   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
    644 }
    645 
    646 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
    647   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
    648 }
    649 
    650 bool hasPackedD16(const MCSubtargetInfo &STI) {
    651   return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
    652 }
    653 
    654 bool isSI(const MCSubtargetInfo &STI) {
    655   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
    656 }
    657 
    658 bool isCI(const MCSubtargetInfo &STI) {
    659   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
    660 }
    661 
    662 bool isVI(const MCSubtargetInfo &STI) {
    663   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
    664 }
    665 
    666 bool isGFX9(const MCSubtargetInfo &STI) {
    667   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
    668 }
    669 
    670 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
    671   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
    672 }
    673 
    674 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
    675   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
    676   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
    677   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
    678     Reg == AMDGPU::SCC;
    679 }
    680 
    681 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
    682   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
    683     if (*R == Reg1) return true;
    684   }
    685   return false;
    686 }
    687 
    688 #define MAP_REG2REG \
    689   using namespace AMDGPU; \
    690   switch(Reg) { \
    691   default: return Reg; \
    692   CASE_CI_VI(FLAT_SCR) \
    693   CASE_CI_VI(FLAT_SCR_LO) \
    694   CASE_CI_VI(FLAT_SCR_HI) \
    695   CASE_VI_GFX9(TTMP0) \
    696   CASE_VI_GFX9(TTMP1) \
    697   CASE_VI_GFX9(TTMP2) \
    698   CASE_VI_GFX9(TTMP3) \
    699   CASE_VI_GFX9(TTMP4) \
    700   CASE_VI_GFX9(TTMP5) \
    701   CASE_VI_GFX9(TTMP6) \
    702   CASE_VI_GFX9(TTMP7) \
    703   CASE_VI_GFX9(TTMP8) \
    704   CASE_VI_GFX9(TTMP9) \
    705   CASE_VI_GFX9(TTMP10) \
    706   CASE_VI_GFX9(TTMP11) \
    707   CASE_VI_GFX9(TTMP12) \
    708   CASE_VI_GFX9(TTMP13) \
    709   CASE_VI_GFX9(TTMP14) \
    710   CASE_VI_GFX9(TTMP15) \
    711   CASE_VI_GFX9(TTMP0_TTMP1) \
    712   CASE_VI_GFX9(TTMP2_TTMP3) \
    713   CASE_VI_GFX9(TTMP4_TTMP5) \
    714   CASE_VI_GFX9(TTMP6_TTMP7) \
    715   CASE_VI_GFX9(TTMP8_TTMP9) \
    716   CASE_VI_GFX9(TTMP10_TTMP11) \
    717   CASE_VI_GFX9(TTMP12_TTMP13) \
    718   CASE_VI_GFX9(TTMP14_TTMP15) \
    719   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
    720   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
    721   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
    722   CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
    723   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
    724   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
    725   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
    726   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
    727   }
    728 
    729 #define CASE_CI_VI(node) \
    730   assert(!isSI(STI)); \
    731   case node: return isCI(STI) ? node##_ci : node##_vi;
    732 
    733 #define CASE_VI_GFX9(node) \
    734   case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
    735 
    736 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
    737   if (STI.getTargetTriple().getArch() == Triple::r600)
    738     return Reg;
    739   MAP_REG2REG
    740 }
    741 
    742 #undef CASE_CI_VI
    743 #undef CASE_VI_GFX9
    744 
    745 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
    746 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
    747 
    748 unsigned mc2PseudoReg(unsigned Reg) {
    749   MAP_REG2REG
    750 }
    751 
    752 #undef CASE_CI_VI
    753 #undef CASE_VI_GFX9
    754 #undef MAP_REG2REG
    755 
    756 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
    757   assert(OpNo < Desc.NumOperands);
    758   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
    759   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
    760          OpType <= AMDGPU::OPERAND_SRC_LAST;
    761 }
    762 
    763 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
    764   assert(OpNo < Desc.NumOperands);
    765   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
    766   switch (OpType) {
    767   case AMDGPU::OPERAND_REG_IMM_FP32:
    768   case AMDGPU::OPERAND_REG_IMM_FP64:
    769   case AMDGPU::OPERAND_REG_IMM_FP16:
    770   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    771   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
    772   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    773   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
    774     return true;
    775   default:
    776     return false;
    777   }
    778 }
    779 
    780 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
    781   assert(OpNo < Desc.NumOperands);
    782   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
    783   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
    784          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
    785 }
    786 
    787 // Avoid using MCRegisterClass::getSize, since that function will go away
    788 // (move from MC* level to Target* level). Return size in bits.
    789 unsigned getRegBitWidth(unsigned RCID) {
    790   switch (RCID) {
    791   case AMDGPU::SGPR_32RegClassID:
    792   case AMDGPU::VGPR_32RegClassID:
    793   case AMDGPU::VS_32RegClassID:
    794   case AMDGPU::SReg_32RegClassID:
    795   case AMDGPU::SReg_32_XM0RegClassID:
    796     return 32;
    797   case AMDGPU::SGPR_64RegClassID:
    798   case AMDGPU::VS_64RegClassID:
    799   case AMDGPU::SReg_64RegClassID:
    800   case AMDGPU::VReg_64RegClassID:
    801     return 64;
    802   case AMDGPU::VReg_96RegClassID:
    803     return 96;
    804   case AMDGPU::SGPR_128RegClassID:
    805   case AMDGPU::SReg_128RegClassID:
    806   case AMDGPU::VReg_128RegClassID:
    807     return 128;
    808   case AMDGPU::SReg_256RegClassID:
    809   case AMDGPU::VReg_256RegClassID:
    810     return 256;
    811   case AMDGPU::SReg_512RegClassID:
    812   case AMDGPU::VReg_512RegClassID:
    813     return 512;
    814   default:
    815     llvm_unreachable("Unexpected register class");
    816   }
    817 }
    818 
    819 unsigned getRegBitWidth(const MCRegisterClass &RC) {
    820   return getRegBitWidth(RC.getID());
    821 }
    822 
    823 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
    824                            unsigned OpNo) {
    825   assert(OpNo < Desc.NumOperands);
    826   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
    827   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
    828 }
    829 
    830 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
    831   if (Literal >= -16 && Literal <= 64)
    832     return true;
    833 
    834   uint64_t Val = static_cast<uint64_t>(Literal);
    835   return (Val == DoubleToBits(0.0)) ||
    836          (Val == DoubleToBits(1.0)) ||
    837          (Val == DoubleToBits(-1.0)) ||
    838          (Val == DoubleToBits(0.5)) ||
    839          (Val == DoubleToBits(-0.5)) ||
    840          (Val == DoubleToBits(2.0)) ||
    841          (Val == DoubleToBits(-2.0)) ||
    842          (Val == DoubleToBits(4.0)) ||
    843          (Val == DoubleToBits(-4.0)) ||
    844          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
    845 }
    846 
    847 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
    848   if (Literal >= -16 && Literal <= 64)
    849     return true;
    850 
    851   // The actual type of the operand does not seem to matter as long
    852   // as the bits match one of the inline immediate values.  For example:
    853   //
    854   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
    855   // so it is a legal inline immediate.
    856   //
    857   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
    858   // floating-point, so it is a legal inline immediate.
    859 
    860   uint32_t Val = static_cast<uint32_t>(Literal);
    861   return (Val == FloatToBits(0.0f)) ||
    862          (Val == FloatToBits(1.0f)) ||
    863          (Val == FloatToBits(-1.0f)) ||
    864          (Val == FloatToBits(0.5f)) ||
    865          (Val == FloatToBits(-0.5f)) ||
    866          (Val == FloatToBits(2.0f)) ||
    867          (Val == FloatToBits(-2.0f)) ||
    868          (Val == FloatToBits(4.0f)) ||
    869          (Val == FloatToBits(-4.0f)) ||
    870          (Val == 0x3e22f983 && HasInv2Pi);
    871 }
    872 
    873 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
    874   if (!HasInv2Pi)
    875     return false;
    876 
    877   if (Literal >= -16 && Literal <= 64)
    878     return true;
    879 
    880   uint16_t Val = static_cast<uint16_t>(Literal);
    881   return Val == 0x3C00 || // 1.0
    882          Val == 0xBC00 || // -1.0
    883          Val == 0x3800 || // 0.5
    884          Val == 0xB800 || // -0.5
    885          Val == 0x4000 || // 2.0
    886          Val == 0xC000 || // -2.0
    887          Val == 0x4400 || // 4.0
    888          Val == 0xC400 || // -4.0
    889          Val == 0x3118;   // 1/2pi
    890 }
    891 
    892 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
    893   assert(HasInv2Pi);
    894 
    895   int16_t Lo16 = static_cast<int16_t>(Literal);
    896   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
    897   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
    898 }
    899 
    900 bool isArgPassedInSGPR(const Argument *A) {
    901   const Function *F = A->getParent();
    902 
    903   // Arguments to compute shaders are never a source of divergence.
    904   CallingConv::ID CC = F->getCallingConv();
    905   switch (CC) {
    906   case CallingConv::AMDGPU_KERNEL:
    907   case CallingConv::SPIR_KERNEL:
    908     return true;
    909   case CallingConv::AMDGPU_VS:
    910   case CallingConv::AMDGPU_LS:
    911   case CallingConv::AMDGPU_HS:
    912   case CallingConv::AMDGPU_ES:
    913   case CallingConv::AMDGPU_GS:
    914   case CallingConv::AMDGPU_PS:
    915   case CallingConv::AMDGPU_CS:
    916     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
    917     // Everything else is in VGPRs.
    918     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
    919            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
    920   default:
    921     // TODO: Should calls support inreg for SGPR inputs?
    922     return false;
    923   }
    924 }
    925 
    926 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
    927   if (isGCN3Encoding(ST))
    928     return ByteOffset;
    929   return ByteOffset >> 2;
    930 }
    931 
    932 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
    933   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
    934   return isGCN3Encoding(ST) ?
    935     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
    936 }
    937 
    938 } // end namespace AMDGPU
    939 
    940 } // end namespace llvm
    941 
    942 namespace llvm {
    943 namespace AMDGPU {
    944 
    945 AMDGPUAS getAMDGPUAS(Triple T) {
    946   AMDGPUAS AS;
    947   AS.FLAT_ADDRESS = 0;
    948   AS.PRIVATE_ADDRESS = 5;
    949   AS.REGION_ADDRESS = 2;
    950   return AS;
    951 }
    952 
    953 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
    954   return getAMDGPUAS(M.getTargetTriple());
    955 }
    956 
    957 AMDGPUAS getAMDGPUAS(const Module &M) {
    958   return getAMDGPUAS(Triple(M.getTargetTriple()));
    959 }
    960 
    961 namespace {
    962 
    963 struct SourceOfDivergence {
    964   unsigned Intr;
    965 };
    966 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
    967 
    968 #define GET_SourcesOfDivergence_IMPL
    969 #include "AMDGPUGenSearchableTables.inc"
    970 
    971 } // end anonymous namespace
    972 
    973 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
    974   return lookupSourceOfDivergence(IntrID);
    975 }
    976 } // namespace AMDGPU
    977 } // namespace llvm
    978