Home | History | Annotate | Download | only in AMDGPU
      1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "SIMachineFunctionInfo.h"
     11 #include "AMDGPUArgumentUsageInfo.h"
     12 #include "AMDGPUSubtarget.h"
     13 #include "SIRegisterInfo.h"
     14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     15 #include "Utils/AMDGPUBaseInfo.h"
     16 #include "llvm/ADT/Optional.h"
     17 #include "llvm/CodeGen/MachineBasicBlock.h"
     18 #include "llvm/CodeGen/MachineFrameInfo.h"
     19 #include "llvm/CodeGen/MachineFunction.h"
     20 #include "llvm/CodeGen/MachineRegisterInfo.h"
     21 #include "llvm/IR/CallingConv.h"
     22 #include "llvm/IR/Function.h"
     23 #include <cassert>
     24 #include <vector>
     25 
     26 #define MAX_LANES 64
     27 
     28 using namespace llvm;
     29 
     30 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     31   : AMDGPUMachineFunction(MF),
     32     PrivateSegmentBuffer(false),
     33     DispatchPtr(false),
     34     QueuePtr(false),
     35     KernargSegmentPtr(false),
     36     DispatchID(false),
     37     FlatScratchInit(false),
     38     WorkGroupIDX(false),
     39     WorkGroupIDY(false),
     40     WorkGroupIDZ(false),
     41     WorkGroupInfo(false),
     42     PrivateSegmentWaveByteOffset(false),
     43     WorkItemIDX(false),
     44     WorkItemIDY(false),
     45     WorkItemIDZ(false),
     46     ImplicitBufferPtr(false),
     47     ImplicitArgPtr(false),
     48     GITPtrHigh(0xffffffff),
     49     HighBitsOf32BitAddress(0) {
     50   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     51   const Function &F = MF.getFunction();
     52   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
     53   WavesPerEU = ST.getWavesPerEU(F);
     54 
     55   Occupancy = getMaxWavesPerEU();
     56   limitOccupancy(MF);
     57   CallingConv::ID CC = F.getCallingConv();
     58 
     59   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
     60     if (!F.arg_empty())
     61       KernargSegmentPtr = true;
     62     WorkGroupIDX = true;
     63     WorkItemIDX = true;
     64   } else if (CC == CallingConv::AMDGPU_PS) {
     65     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
     66   }
     67 
     68   if (!isEntryFunction()) {
     69     // Non-entry functions have no special inputs for now, other registers
     70     // required for scratch access.
     71     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
     72     ScratchWaveOffsetReg = AMDGPU::SGPR4;
     73     FrameOffsetReg = AMDGPU::SGPR5;
     74     StackPtrOffsetReg = AMDGPU::SGPR32;
     75 
     76     ArgInfo.PrivateSegmentBuffer =
     77       ArgDescriptor::createRegister(ScratchRSrcReg);
     78     ArgInfo.PrivateSegmentWaveByteOffset =
     79       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
     80 
     81     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
     82       ImplicitArgPtr = true;
     83   } else {
     84     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
     85       KernargSegmentPtr = true;
     86       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
     87                                  MaxKernArgAlign);
     88     }
     89   }
     90 
     91   if (ST.debuggerEmitPrologue()) {
     92     // Enable everything.
     93     WorkGroupIDX = true;
     94     WorkGroupIDY = true;
     95     WorkGroupIDZ = true;
     96     WorkItemIDX = true;
     97     WorkItemIDY = true;
     98     WorkItemIDZ = true;
     99   } else {
    100     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
    101       WorkGroupIDX = true;
    102 
    103     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
    104       WorkGroupIDY = true;
    105 
    106     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
    107       WorkGroupIDZ = true;
    108 
    109     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
    110       WorkItemIDX = true;
    111 
    112     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
    113       WorkItemIDY = true;
    114 
    115     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
    116       WorkItemIDZ = true;
    117   }
    118 
    119   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
    120   bool MaySpill = ST.isVGPRSpillingEnabled(F);
    121   bool HasStackObjects = FrameInfo.hasStackObjects();
    122 
    123   if (isEntryFunction()) {
    124     // X, XY, and XYZ are the only supported combinations, so make sure Y is
    125     // enabled if Z is.
    126     if (WorkItemIDZ)
    127       WorkItemIDY = true;
    128 
    129     if (HasStackObjects || MaySpill) {
    130       PrivateSegmentWaveByteOffset = true;
    131 
    132     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
    133     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
    134         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
    135       ArgInfo.PrivateSegmentWaveByteOffset
    136         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
    137     }
    138   }
    139 
    140   bool IsCOV2 = ST.isAmdCodeObjectV2(F);
    141   if (IsCOV2) {
    142     if (HasStackObjects || MaySpill)
    143       PrivateSegmentBuffer = true;
    144 
    145     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
    146       DispatchPtr = true;
    147 
    148     if (F.hasFnAttribute("amdgpu-queue-ptr"))
    149       QueuePtr = true;
    150 
    151     if (F.hasFnAttribute("amdgpu-dispatch-id"))
    152       DispatchID = true;
    153   } else if (ST.isMesaGfxShader(F)) {
    154     if (HasStackObjects || MaySpill)
    155       ImplicitBufferPtr = true;
    156   }
    157 
    158   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
    159     KernargSegmentPtr = true;
    160 
    161   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
    162     // TODO: This could be refined a lot. The attribute is a poor way of
    163     // detecting calls that may require it before argument lowering.
    164     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
    165       FlatScratchInit = true;
    166   }
    167 
    168   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
    169   StringRef S = A.getValueAsString();
    170   if (!S.empty())
    171     S.consumeInteger(0, GITPtrHigh);
    172 
    173   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
    174   S = A.getValueAsString();
    175   if (!S.empty())
    176     S.consumeInteger(0, HighBitsOf32BitAddress);
    177 }
    178 
    179 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
    180   limitOccupancy(getMaxWavesPerEU());
    181   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
    182   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
    183                  MF.getFunction()));
    184 }
    185 
    186 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
    187   const SIRegisterInfo &TRI) {
    188   ArgInfo.PrivateSegmentBuffer =
    189     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    190     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
    191   NumUserSGPRs += 4;
    192   return ArgInfo.PrivateSegmentBuffer.getRegister();
    193 }
    194 
    195 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
    196   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    197     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    198   NumUserSGPRs += 2;
    199   return ArgInfo.DispatchPtr.getRegister();
    200 }
    201 
    202 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
    203   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    204     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    205   NumUserSGPRs += 2;
    206   return ArgInfo.QueuePtr.getRegister();
    207 }
    208 
    209 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
    210   ArgInfo.KernargSegmentPtr
    211     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    212     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    213   NumUserSGPRs += 2;
    214   return ArgInfo.KernargSegmentPtr.getRegister();
    215 }
    216 
    217 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
    218   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    219     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    220   NumUserSGPRs += 2;
    221   return ArgInfo.DispatchID.getRegister();
    222 }
    223 
    224 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
    225   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    226     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    227   NumUserSGPRs += 2;
    228   return ArgInfo.FlatScratchInit.getRegister();
    229 }
    230 
    231 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
    232   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    233     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    234   NumUserSGPRs += 2;
    235   return ArgInfo.ImplicitBufferPtr.getRegister();
    236 }
    237 
    238 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
    239   for (unsigned I = 0; CSRegs[I]; ++I) {
    240     if (CSRegs[I] == Reg)
    241       return true;
    242   }
    243 
    244   return false;
    245 }
    246 
    247 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
    248 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
    249                                                     int FI) {
    250   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
    251 
    252   // This has already been allocated.
    253   if (!SpillLanes.empty())
    254     return true;
    255 
    256   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    257   const SIRegisterInfo *TRI = ST.getRegisterInfo();
    258   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
    259   MachineRegisterInfo &MRI = MF.getRegInfo();
    260   unsigned WaveSize = ST.getWavefrontSize();
    261 
    262   unsigned Size = FrameInfo.getObjectSize(FI);
    263   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
    264   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
    265 
    266   int NumLanes = Size / 4;
    267 
    268   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
    269 
    270   // Make sure to handle the case where a wide SGPR spill may span between two
    271   // VGPRs.
    272   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
    273     unsigned LaneVGPR;
    274     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
    275 
    276     if (VGPRIndex == 0) {
    277       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
    278       if (LaneVGPR == AMDGPU::NoRegister) {
    279         // We have no VGPRs left for spilling SGPRs. Reset because we will not
    280         // partially spill the SGPR to VGPRs.
    281         SGPRToVGPRSpills.erase(FI);
    282         NumVGPRSpillLanes -= I;
    283         return false;
    284       }
    285 
    286       Optional<int> CSRSpillFI;
    287       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
    288           isCalleeSavedReg(CSRegs, LaneVGPR)) {
    289         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
    290       }
    291 
    292       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
    293 
    294       // Add this register as live-in to all blocks to avoid machine verifer
    295       // complaining about use of an undefined physical register.
    296       for (MachineBasicBlock &BB : MF)
    297         BB.addLiveIn(LaneVGPR);
    298     } else {
    299       LaneVGPR = SpillVGPRs.back().VGPR;
    300     }
    301 
    302     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
    303   }
    304 
    305   return true;
    306 }
    307 
    308 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
    309   for (auto &R : SGPRToVGPRSpills)
    310     MFI.RemoveStackObject(R.first);
    311 }
    312 
    313 
    314 /// \returns VGPR used for \p Dim' work item ID.
    315 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
    316   switch (Dim) {
    317   case 0:
    318     assert(hasWorkItemIDX());
    319     return AMDGPU::VGPR0;
    320   case 1:
    321     assert(hasWorkItemIDY());
    322     return AMDGPU::VGPR1;
    323   case 2:
    324     assert(hasWorkItemIDZ());
    325     return AMDGPU::VGPR2;
    326   }
    327   llvm_unreachable("unexpected dimension");
    328 }
    329 
    330 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
    331   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
    332   return AMDGPU::SGPR0 + NumUserSGPRs;
    333 }
    334 
    335 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
    336   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
    337 }
    338