Home | History | Annotate | Download | only in AMDGPU
      1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 /// \file
      9 //===----------------------------------------------------------------------===//
     10 
     11 
     12 #include "SIMachineFunctionInfo.h"
     13 #include "AMDGPUSubtarget.h"
     14 #include "SIInstrInfo.h"
     15 #include "llvm/CodeGen/MachineInstrBuilder.h"
     16 #include "llvm/CodeGen/MachineFrameInfo.h"
     17 #include "llvm/CodeGen/MachineRegisterInfo.h"
     18 #include "llvm/IR/Function.h"
     19 #include "llvm/IR/LLVMContext.h"
     20 
     21 #define MAX_LANES 64
     22 
     23 using namespace llvm;
     24 
     25 
     26 // Pin the vtable to this file.
     27 void SIMachineFunctionInfo::anchor() {}
     28 
     29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     30   : AMDGPUMachineFunction(MF),
     31     TIDReg(AMDGPU::NoRegister),
     32     ScratchRSrcReg(AMDGPU::NoRegister),
     33     ScratchWaveOffsetReg(AMDGPU::NoRegister),
     34     PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
     35     DispatchPtrUserSGPR(AMDGPU::NoRegister),
     36     QueuePtrUserSGPR(AMDGPU::NoRegister),
     37     KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
     38     DispatchIDUserSGPR(AMDGPU::NoRegister),
     39     FlatScratchInitUserSGPR(AMDGPU::NoRegister),
     40     PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
     41     GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
     42     GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
     43     GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
     44     WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
     45     WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
     46     WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
     47     WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
     48     PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
     49     LDSWaveSpillSize(0),
     50     PSInputAddr(0),
     51     NumUserSGPRs(0),
     52     NumSystemSGPRs(0),
     53     HasSpilledSGPRs(false),
     54     HasSpilledVGPRs(false),
     55     PrivateSegmentBuffer(false),
     56     DispatchPtr(false),
     57     QueuePtr(false),
     58     DispatchID(false),
     59     KernargSegmentPtr(false),
     60     FlatScratchInit(false),
     61     GridWorkgroupCountX(false),
     62     GridWorkgroupCountY(false),
     63     GridWorkgroupCountZ(false),
     64     WorkGroupIDX(true),
     65     WorkGroupIDY(false),
     66     WorkGroupIDZ(false),
     67     WorkGroupInfo(false),
     68     PrivateSegmentWaveByteOffset(false),
     69     WorkItemIDX(true),
     70     WorkItemIDY(false),
     71     WorkItemIDZ(false) {
     72   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
     73   const Function *F = MF.getFunction();
     74 
     75   const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
     76 
     77   if (getShaderType() == ShaderType::COMPUTE)
     78     KernargSegmentPtr = true;
     79 
     80   if (F->hasFnAttribute("amdgpu-work-group-id-y"))
     81     WorkGroupIDY = true;
     82 
     83   if (F->hasFnAttribute("amdgpu-work-group-id-z"))
     84     WorkGroupIDZ = true;
     85 
     86   if (F->hasFnAttribute("amdgpu-work-item-id-y"))
     87     WorkItemIDY = true;
     88 
     89   if (F->hasFnAttribute("amdgpu-work-item-id-z"))
     90     WorkItemIDZ = true;
     91 
     92   bool MaySpill = ST.isVGPRSpillingEnabled(this);
     93   bool HasStackObjects = FrameInfo->hasStackObjects();
     94 
     95   if (HasStackObjects || MaySpill)
     96     PrivateSegmentWaveByteOffset = true;
     97 
     98   if (ST.isAmdHsaOS()) {
     99     if (HasStackObjects || MaySpill)
    100       PrivateSegmentBuffer = true;
    101 
    102     if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
    103       DispatchPtr = true;
    104   }
    105 
    106   // X, XY, and XYZ are the only supported combinations, so make sure Y is
    107   // enabled if Z is.
    108   if (WorkItemIDZ)
    109     WorkItemIDY = true;
    110 }
    111 
    112 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
    113   const SIRegisterInfo &TRI) {
    114   PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
    115     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
    116   NumUserSGPRs += 4;
    117   return PrivateSegmentBufferUserSGPR;
    118 }
    119 
    120 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
    121   DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
    122     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
    123   NumUserSGPRs += 2;
    124   return DispatchPtrUserSGPR;
    125 }
    126 
    127 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
    128   QueuePtrUserSGPR = TRI.getMatchingSuperReg(
    129     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
    130   NumUserSGPRs += 2;
    131   return QueuePtrUserSGPR;
    132 }
    133 
    134 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
    135   KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
    136     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
    137   NumUserSGPRs += 2;
    138   return KernargSegmentPtrUserSGPR;
    139 }
    140 
    141 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
    142                                                        MachineFunction *MF,
    143                                                        unsigned FrameIndex,
    144                                                        unsigned SubIdx) {
    145   const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
    146   const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
    147       MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
    148   MachineRegisterInfo &MRI = MF->getRegInfo();
    149   int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
    150   Offset += SubIdx * 4;
    151 
    152   unsigned LaneVGPRIdx = Offset / (64 * 4);
    153   unsigned Lane = (Offset / 4) % 64;
    154 
    155   struct SpilledReg Spill;
    156 
    157   if (!LaneVGPRs.count(LaneVGPRIdx)) {
    158     unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
    159     LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
    160 
    161     // Add this register as live-in to all blocks to avoid machine verifer
    162     // complaining about use of an undefined physical register.
    163     for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
    164          BI != BE; ++BI) {
    165       BI->addLiveIn(LaneVGPR);
    166     }
    167   }
    168 
    169   Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
    170   Spill.Lane = Lane;
    171   return Spill;
    172 }
    173 
    174 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
    175                                               const MachineFunction &MF) const {
    176   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
    177   // FIXME: We should get this information from kernel attributes if it
    178   // is available.
    179   return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
    180 }
    181