Home | History | Annotate | Download | only in AMDGPU
      1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
     15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
     16 
     17 #include "AMDGPUArgumentUsageInfo.h"
     18 #include "AMDGPUMachineFunction.h"
     19 #include "SIInstrInfo.h"
     20 #include "SIRegisterInfo.h"
     21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     22 #include "llvm/ADT/ArrayRef.h"
     23 #include "llvm/ADT/DenseMap.h"
     24 #include "llvm/ADT/Optional.h"
     25 #include "llvm/ADT/SmallVector.h"
     26 #include "llvm/CodeGen/PseudoSourceValue.h"
     27 #include "llvm/CodeGen/TargetInstrInfo.h"
     28 #include "llvm/MC/MCRegisterInfo.h"
     29 #include "llvm/Support/ErrorHandling.h"
     30 #include <array>
     31 #include <cassert>
     32 #include <utility>
     33 #include <vector>
     34 
     35 namespace llvm {
     36 
     37 class MachineFrameInfo;
     38 class MachineFunction;
     39 class TargetRegisterClass;
     40 
     41 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
     42 public:
     43   // TODO: Is the img rsrc useful?
     44   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
     45     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
     46 
     47   bool isConstant(const MachineFrameInfo *) const override {
     48     // This should probably be true for most images, but we will start by being
     49     // conservative.
     50     return false;
     51   }
     52 
     53   bool isAliased(const MachineFrameInfo *) const override {
     54     return true;
     55   }
     56 
     57   bool mayAlias(const MachineFrameInfo *) const override {
     58     return true;
     59   }
     60 };
     61 
     62 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
     63 public:
     64   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
     65     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
     66 
     67   bool isConstant(const MachineFrameInfo *) const override {
     68     // This should probably be true for most images, but we will start by being
     69     // conservative.
     70     return false;
     71   }
     72 
     73   bool isAliased(const MachineFrameInfo *) const override {
     74     return true;
     75   }
     76 
     77   bool mayAlias(const MachineFrameInfo *) const override {
     78     return true;
     79   }
     80 };
     81 
     82 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
     83 /// tells the hardware which interpolation parameters to load.
     84 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
     85   unsigned TIDReg = AMDGPU::NoRegister;
     86 
     87   // Registers that may be reserved for spilling purposes. These may be the same
     88   // as the input registers.
     89   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
     90   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
     91 
     92   // This is the current function's incremented size from the kernel's scratch
     93   // wave offset register. For an entry function, this is exactly the same as
     94   // the ScratchWaveOffsetReg.
     95   unsigned FrameOffsetReg = AMDGPU::FP_REG;
     96 
     97   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
     98   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
     99 
    100   AMDGPUFunctionArgInfo ArgInfo;
    101 
    102   // Graphics info.
    103   unsigned PSInputAddr = 0;
    104   unsigned PSInputEnable = 0;
    105 
    106   /// Number of bytes of arguments this function has on the stack. If the callee
    107   /// is expected to restore the argument stack this should be a multiple of 16,
    108   /// all usable during a tail call.
    109   ///
    110   /// The alternative would forbid tail call optimisation in some cases: if we
    111   /// want to transfer control from a function with 8-bytes of stack-argument
    112   /// space to a function with 16-bytes then misalignment of this value would
    113   /// make a stack adjustment necessary, which could not be undone by the
    114   /// callee.
    115   unsigned BytesInStackArgArea = 0;
    116 
    117   bool ReturnsVoid = true;
    118 
    119   // A pair of default/requested minimum/maximum flat work group sizes.
    120   // Minimum - first, maximum - second.
    121   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
    122 
    123   // A pair of default/requested minimum/maximum number of waves per execution
    124   // unit. Minimum - first, maximum - second.
    125   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
    126 
    127   // Stack object indices for work group IDs.
    128   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
    129 
    130   // Stack object indices for work item IDs.
    131   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
    132 
    133   DenseMap<const Value *,
    134            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
    135   DenseMap<const Value *,
    136            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
    137 
    138 private:
    139   unsigned LDSWaveSpillSize = 0;
    140   unsigned NumUserSGPRs = 0;
    141   unsigned NumSystemSGPRs = 0;
    142 
    143   bool HasSpilledSGPRs = false;
    144   bool HasSpilledVGPRs = false;
    145   bool HasNonSpillStackObjects = false;
    146   bool IsStackRealigned = false;
    147 
    148   unsigned NumSpilledSGPRs = 0;
    149   unsigned NumSpilledVGPRs = 0;
    150 
    151   // Feature bits required for inputs passed in user SGPRs.
    152   bool PrivateSegmentBuffer : 1;
    153   bool DispatchPtr : 1;
    154   bool QueuePtr : 1;
    155   bool KernargSegmentPtr : 1;
    156   bool DispatchID : 1;
    157   bool FlatScratchInit : 1;
    158 
    159   // Feature bits required for inputs passed in system SGPRs.
    160   bool WorkGroupIDX : 1; // Always initialized.
    161   bool WorkGroupIDY : 1;
    162   bool WorkGroupIDZ : 1;
    163   bool WorkGroupInfo : 1;
    164   bool PrivateSegmentWaveByteOffset : 1;
    165 
    166   bool WorkItemIDX : 1; // Always initialized.
    167   bool WorkItemIDY : 1;
    168   bool WorkItemIDZ : 1;
    169 
    170   // Private memory buffer
    171   // Compute directly in sgpr[0:1]
    172   // Other shaders indirect 64-bits at sgpr[0:1]
    173   bool ImplicitBufferPtr : 1;
    174 
    175   // Pointer to where the ABI inserts special kernel arguments separate from the
    176   // user arguments. This is an offset from the KernargSegmentPtr.
    177   bool ImplicitArgPtr : 1;
    178 
    179   // The hard-wired high half of the address of the global information table
    180   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
    181   // current hardware only allows a 16 bit value.
    182   unsigned GITPtrHigh;
    183 
    184   unsigned HighBitsOf32BitAddress;
    185 
    186   // Current recorded maximum possible occupancy.
    187   unsigned Occupancy;
    188 
    189   MCPhysReg getNextUserSGPR() const;
    190 
    191   MCPhysReg getNextSystemSGPR() const;
    192 
    193 public:
    194   struct SpilledReg {
    195     unsigned VGPR = 0;
    196     int Lane = -1;
    197 
    198     SpilledReg() = default;
    199     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
    200 
    201     bool hasLane() { return Lane != -1;}
    202     bool hasReg() { return VGPR != 0;}
    203   };
    204 
    205   struct SGPRSpillVGPRCSR {
    206     // VGPR used for SGPR spills
    207     unsigned VGPR;
    208 
    209     // If the VGPR is a CSR, the stack slot used to save/restore it in the
    210     // prolog/epilog.
    211     Optional<int> FI;
    212 
    213     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
    214   };
    215 
    216 private:
    217   // SGPR->VGPR spilling support.
    218   using SpillRegMask = std::pair<unsigned, unsigned>;
    219 
    220   // Track VGPR + wave index for each subregister of the SGPR spilled to
    221   // frameindex key.
    222   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
    223   unsigned NumVGPRSpillLanes = 0;
    224   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
    225 
    226 public:
    227   SIMachineFunctionInfo(const MachineFunction &MF);
    228 
    229   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
    230     auto I = SGPRToVGPRSpills.find(FrameIndex);
    231     return (I == SGPRToVGPRSpills.end()) ?
    232       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
    233   }
    234 
    235   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
    236     return SpillVGPRs;
    237   }
    238 
    239   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
    240   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
    241 
    242   bool hasCalculatedTID() const { return TIDReg != 0; };
    243   unsigned getTIDReg() const { return TIDReg; };
    244   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
    245 
    246   unsigned getBytesInStackArgArea() const {
    247     return BytesInStackArgArea;
    248   }
    249 
    250   void setBytesInStackArgArea(unsigned Bytes) {
    251     BytesInStackArgArea = Bytes;
    252   }
    253 
    254   // Add user SGPRs.
    255   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
    256   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
    257   unsigned addQueuePtr(const SIRegisterInfo &TRI);
    258   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
    259   unsigned addDispatchID(const SIRegisterInfo &TRI);
    260   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
    261   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
    262 
    263   // Add system SGPRs.
    264   unsigned addWorkGroupIDX() {
    265     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
    266     NumSystemSGPRs += 1;
    267     return ArgInfo.WorkGroupIDX.getRegister();
    268   }
    269 
    270   unsigned addWorkGroupIDY() {
    271     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
    272     NumSystemSGPRs += 1;
    273     return ArgInfo.WorkGroupIDY.getRegister();
    274   }
    275 
    276   unsigned addWorkGroupIDZ() {
    277     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
    278     NumSystemSGPRs += 1;
    279     return ArgInfo.WorkGroupIDZ.getRegister();
    280   }
    281 
    282   unsigned addWorkGroupInfo() {
    283     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
    284     NumSystemSGPRs += 1;
    285     return ArgInfo.WorkGroupInfo.getRegister();
    286   }
    287 
    288   // Add special VGPR inputs
    289   void setWorkItemIDX(ArgDescriptor Arg) {
    290     ArgInfo.WorkItemIDX = Arg;
    291   }
    292 
    293   void setWorkItemIDY(ArgDescriptor Arg) {
    294     ArgInfo.WorkItemIDY = Arg;
    295   }
    296 
    297   void setWorkItemIDZ(ArgDescriptor Arg) {
    298     ArgInfo.WorkItemIDZ = Arg;
    299   }
    300 
    301   unsigned addPrivateSegmentWaveByteOffset() {
    302     ArgInfo.PrivateSegmentWaveByteOffset
    303       = ArgDescriptor::createRegister(getNextSystemSGPR());
    304     NumSystemSGPRs += 1;
    305     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
    306   }
    307 
    308   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
    309     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
    310   }
    311 
    312   bool hasPrivateSegmentBuffer() const {
    313     return PrivateSegmentBuffer;
    314   }
    315 
    316   bool hasDispatchPtr() const {
    317     return DispatchPtr;
    318   }
    319 
    320   bool hasQueuePtr() const {
    321     return QueuePtr;
    322   }
    323 
    324   bool hasKernargSegmentPtr() const {
    325     return KernargSegmentPtr;
    326   }
    327 
    328   bool hasDispatchID() const {
    329     return DispatchID;
    330   }
    331 
    332   bool hasFlatScratchInit() const {
    333     return FlatScratchInit;
    334   }
    335 
    336   bool hasWorkGroupIDX() const {
    337     return WorkGroupIDX;
    338   }
    339 
    340   bool hasWorkGroupIDY() const {
    341     return WorkGroupIDY;
    342   }
    343 
    344   bool hasWorkGroupIDZ() const {
    345     return WorkGroupIDZ;
    346   }
    347 
    348   bool hasWorkGroupInfo() const {
    349     return WorkGroupInfo;
    350   }
    351 
    352   bool hasPrivateSegmentWaveByteOffset() const {
    353     return PrivateSegmentWaveByteOffset;
    354   }
    355 
    356   bool hasWorkItemIDX() const {
    357     return WorkItemIDX;
    358   }
    359 
    360   bool hasWorkItemIDY() const {
    361     return WorkItemIDY;
    362   }
    363 
    364   bool hasWorkItemIDZ() const {
    365     return WorkItemIDZ;
    366   }
    367 
    368   bool hasImplicitArgPtr() const {
    369     return ImplicitArgPtr;
    370   }
    371 
    372   bool hasImplicitBufferPtr() const {
    373     return ImplicitBufferPtr;
    374   }
    375 
    376   AMDGPUFunctionArgInfo &getArgInfo() {
    377     return ArgInfo;
    378   }
    379 
    380   const AMDGPUFunctionArgInfo &getArgInfo() const {
    381     return ArgInfo;
    382   }
    383 
    384   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
    385   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
    386     return ArgInfo.getPreloadedValue(Value);
    387   }
    388 
    389   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
    390     return ArgInfo.getPreloadedValue(Value).first->getRegister();
    391   }
    392 
    393   unsigned getGITPtrHigh() const {
    394     return GITPtrHigh;
    395   }
    396 
    397   unsigned get32BitAddressHighBits() const {
    398     return HighBitsOf32BitAddress;
    399   }
    400 
    401   unsigned getNumUserSGPRs() const {
    402     return NumUserSGPRs;
    403   }
    404 
    405   unsigned getNumPreloadedSGPRs() const {
    406     return NumUserSGPRs + NumSystemSGPRs;
    407   }
    408 
    409   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
    410     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
    411   }
    412 
    413   /// Returns the physical register reserved for use as the resource
    414   /// descriptor for scratch accesses.
    415   unsigned getScratchRSrcReg() const {
    416     return ScratchRSrcReg;
    417   }
    418 
    419   void setScratchRSrcReg(unsigned Reg) {
    420     assert(Reg != 0 && "Should never be unset");
    421     ScratchRSrcReg = Reg;
    422   }
    423 
    424   unsigned getScratchWaveOffsetReg() const {
    425     return ScratchWaveOffsetReg;
    426   }
    427 
    428   unsigned getFrameOffsetReg() const {
    429     return FrameOffsetReg;
    430   }
    431 
    432   void setStackPtrOffsetReg(unsigned Reg) {
    433     assert(Reg != 0 && "Should never be unset");
    434     StackPtrOffsetReg = Reg;
    435   }
    436 
    437   // Note the unset value for this is AMDGPU::SP_REG rather than
    438   // NoRegister. This is mostly a workaround for MIR tests where state that
    439   // can't be directly computed from the function is not preserved in serialized
    440   // MIR.
    441   unsigned getStackPtrOffsetReg() const {
    442     return StackPtrOffsetReg;
    443   }
    444 
    445   void setScratchWaveOffsetReg(unsigned Reg) {
    446     assert(Reg != 0 && "Should never be unset");
    447     ScratchWaveOffsetReg = Reg;
    448     if (isEntryFunction())
    449       FrameOffsetReg = ScratchWaveOffsetReg;
    450   }
    451 
    452   unsigned getQueuePtrUserSGPR() const {
    453     return ArgInfo.QueuePtr.getRegister();
    454   }
    455 
    456   unsigned getImplicitBufferPtrUserSGPR() const {
    457     return ArgInfo.ImplicitBufferPtr.getRegister();
    458   }
    459 
    460   bool hasSpilledSGPRs() const {
    461     return HasSpilledSGPRs;
    462   }
    463 
    464   void setHasSpilledSGPRs(bool Spill = true) {
    465     HasSpilledSGPRs = Spill;
    466   }
    467 
    468   bool hasSpilledVGPRs() const {
    469     return HasSpilledVGPRs;
    470   }
    471 
    472   void setHasSpilledVGPRs(bool Spill = true) {
    473     HasSpilledVGPRs = Spill;
    474   }
    475 
    476   bool hasNonSpillStackObjects() const {
    477     return HasNonSpillStackObjects;
    478   }
    479 
    480   void setHasNonSpillStackObjects(bool StackObject = true) {
    481     HasNonSpillStackObjects = StackObject;
    482   }
    483 
    484   bool isStackRealigned() const {
    485     return IsStackRealigned;
    486   }
    487 
    488   void setIsStackRealigned(bool Realigned = true) {
    489     IsStackRealigned = Realigned;
    490   }
    491 
    492   unsigned getNumSpilledSGPRs() const {
    493     return NumSpilledSGPRs;
    494   }
    495 
    496   unsigned getNumSpilledVGPRs() const {
    497     return NumSpilledVGPRs;
    498   }
    499 
    500   void addToSpilledSGPRs(unsigned num) {
    501     NumSpilledSGPRs += num;
    502   }
    503 
    504   void addToSpilledVGPRs(unsigned num) {
    505     NumSpilledVGPRs += num;
    506   }
    507 
    508   unsigned getPSInputAddr() const {
    509     return PSInputAddr;
    510   }
    511 
    512   unsigned getPSInputEnable() const {
    513     return PSInputEnable;
    514   }
    515 
    516   bool isPSInputAllocated(unsigned Index) const {
    517     return PSInputAddr & (1 << Index);
    518   }
    519 
    520   void markPSInputAllocated(unsigned Index) {
    521     PSInputAddr |= 1 << Index;
    522   }
    523 
    524   void markPSInputEnabled(unsigned Index) {
    525     PSInputEnable |= 1 << Index;
    526   }
    527 
    528   bool returnsVoid() const {
    529     return ReturnsVoid;
    530   }
    531 
    532   void setIfReturnsVoid(bool Value) {
    533     ReturnsVoid = Value;
    534   }
    535 
    536   /// \returns A pair of default/requested minimum/maximum flat work group sizes
    537   /// for this function.
    538   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
    539     return FlatWorkGroupSizes;
    540   }
    541 
    542   /// \returns Default/requested minimum flat work group size for this function.
    543   unsigned getMinFlatWorkGroupSize() const {
    544     return FlatWorkGroupSizes.first;
    545   }
    546 
    547   /// \returns Default/requested maximum flat work group size for this function.
    548   unsigned getMaxFlatWorkGroupSize() const {
    549     return FlatWorkGroupSizes.second;
    550   }
    551 
    552   /// \returns A pair of default/requested minimum/maximum number of waves per
    553   /// execution unit.
    554   std::pair<unsigned, unsigned> getWavesPerEU() const {
    555     return WavesPerEU;
    556   }
    557 
    558   /// \returns Default/requested minimum number of waves per execution unit.
    559   unsigned getMinWavesPerEU() const {
    560     return WavesPerEU.first;
    561   }
    562 
    563   /// \returns Default/requested maximum number of waves per execution unit.
    564   unsigned getMaxWavesPerEU() const {
    565     return WavesPerEU.second;
    566   }
    567 
    568   /// \returns Stack object index for \p Dim's work group ID.
    569   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
    570     assert(Dim < 3);
    571     return DebuggerWorkGroupIDStackObjectIndices[Dim];
    572   }
    573 
    574   /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
    575   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
    576     assert(Dim < 3);
    577     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
    578   }
    579 
    580   /// \returns Stack object index for \p Dim's work item ID.
    581   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
    582     assert(Dim < 3);
    583     return DebuggerWorkItemIDStackObjectIndices[Dim];
    584   }
    585 
    586   /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
    587   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
    588     assert(Dim < 3);
    589     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
    590   }
    591 
    592   /// \returns SGPR used for \p Dim's work group ID.
    593   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
    594     switch (Dim) {
    595     case 0:
    596       assert(hasWorkGroupIDX());
    597       return ArgInfo.WorkGroupIDX.getRegister();
    598     case 1:
    599       assert(hasWorkGroupIDY());
    600       return ArgInfo.WorkGroupIDY.getRegister();
    601     case 2:
    602       assert(hasWorkGroupIDZ());
    603       return ArgInfo.WorkGroupIDZ.getRegister();
    604     }
    605     llvm_unreachable("unexpected dimension");
    606   }
    607 
    608   /// \returns VGPR used for \p Dim' work item ID.
    609   unsigned getWorkItemIDVGPR(unsigned Dim) const;
    610 
    611   unsigned getLDSWaveSpillSize() const {
    612     return LDSWaveSpillSize;
    613   }
    614 
    615   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
    616                                                     const Value *BufferRsrc) {
    617     assert(BufferRsrc);
    618     auto PSV = BufferPSVs.try_emplace(
    619       BufferRsrc,
    620       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
    621     return PSV.first->second.get();
    622   }
    623 
    624   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
    625                                                   const Value *ImgRsrc) {
    626     assert(ImgRsrc);
    627     auto PSV = ImagePSVs.try_emplace(
    628       ImgRsrc,
    629       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
    630     return PSV.first->second.get();
    631   }
    632 
    633   unsigned getOccupancy() const {
    634     return Occupancy;
    635   }
    636 
    637   unsigned getMinAllowedOccupancy() const {
    638     if (!isMemoryBound() && !needsWaveLimiter())
    639       return Occupancy;
    640     return (Occupancy < 4) ? Occupancy : 4;
    641   }
    642 
    643   void limitOccupancy(const MachineFunction &MF);
    644 
    645   void limitOccupancy(unsigned Limit) {
    646     if (Occupancy > Limit)
    647       Occupancy = Limit;
    648   }
    649 
    650   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
    651     if (Occupancy < Limit)
    652       Occupancy = Limit;
    653     limitOccupancy(MF);
    654   }
    655 };
    656 
    657 } // end namespace llvm
    658 
    659 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
    660