1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 11 12 #include "SIMachineFunctionInfo.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/MachineInstrBuilder.h" 16 #include "llvm/CodeGen/MachineFrameInfo.h" 17 #include "llvm/CodeGen/MachineRegisterInfo.h" 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/LLVMContext.h" 20 21 #define MAX_LANES 64 22 23 using namespace llvm; 24 25 26 // Pin the vtable to this file. 27 void SIMachineFunctionInfo::anchor() {} 28 29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30 : AMDGPUMachineFunction(MF), 31 TIDReg(AMDGPU::NoRegister), 32 ScratchRSrcReg(AMDGPU::NoRegister), 33 ScratchWaveOffsetReg(AMDGPU::NoRegister), 34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 35 DispatchPtrUserSGPR(AMDGPU::NoRegister), 36 QueuePtrUserSGPR(AMDGPU::NoRegister), 37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 38 DispatchIDUserSGPR(AMDGPU::NoRegister), 39 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 49 LDSWaveSpillSize(0), 50 PSInputAddr(0), 51 NumUserSGPRs(0), 52 NumSystemSGPRs(0), 53 HasSpilledSGPRs(false), 54 HasSpilledVGPRs(false), 55 PrivateSegmentBuffer(false), 56 DispatchPtr(false), 57 QueuePtr(false), 58 DispatchID(false), 59 KernargSegmentPtr(false), 60 FlatScratchInit(false), 61 GridWorkgroupCountX(false), 62 GridWorkgroupCountY(false), 63 GridWorkgroupCountZ(false), 64 WorkGroupIDX(true), 65 WorkGroupIDY(false), 66 WorkGroupIDZ(false), 67 WorkGroupInfo(false), 68 PrivateSegmentWaveByteOffset(false), 69 WorkItemIDX(true), 70 WorkItemIDY(false), 71 WorkItemIDZ(false) { 72 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 73 const Function *F = MF.getFunction(); 74 75 const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 76 77 if (getShaderType() == ShaderType::COMPUTE) 78 KernargSegmentPtr = true; 79 80 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 81 WorkGroupIDY = true; 82 83 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 84 WorkGroupIDZ = true; 85 86 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 87 WorkItemIDY = true; 88 89 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 90 WorkItemIDZ = true; 91 92 bool MaySpill = ST.isVGPRSpillingEnabled(this); 93 bool HasStackObjects = FrameInfo->hasStackObjects(); 94 95 if (HasStackObjects || MaySpill) 96 PrivateSegmentWaveByteOffset = true; 97 98 if (ST.isAmdHsaOS()) { 99 if (HasStackObjects || MaySpill) 100 PrivateSegmentBuffer = true; 101 102 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 103 DispatchPtr = true; 104 } 105 106 // X, XY, and XYZ are the only supported combinations, so make sure Y is 107 // enabled if Z is. 108 if (WorkItemIDZ) 109 WorkItemIDY = true; 110 } 111 112 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 113 const SIRegisterInfo &TRI) { 114 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 115 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 116 NumUserSGPRs += 4; 117 return PrivateSegmentBufferUserSGPR; 118 } 119 120 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 121 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 122 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 123 NumUserSGPRs += 2; 124 return DispatchPtrUserSGPR; 125 } 126 127 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 128 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 129 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 130 NumUserSGPRs += 2; 131 return QueuePtrUserSGPR; 132 } 133 134 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 135 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 136 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 137 NumUserSGPRs += 2; 138 return KernargSegmentPtrUserSGPR; 139 } 140 141 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( 142 MachineFunction *MF, 143 unsigned FrameIndex, 144 unsigned SubIdx) { 145 const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 146 const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( 147 MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo()); 148 MachineRegisterInfo &MRI = MF->getRegInfo(); 149 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); 150 Offset += SubIdx * 4; 151 152 unsigned LaneVGPRIdx = Offset / (64 * 4); 153 unsigned Lane = (Offset / 4) % 64; 154 155 struct SpilledReg Spill; 156 157 if (!LaneVGPRs.count(LaneVGPRIdx)) { 158 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); 159 LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 160 161 // Add this register as live-in to all blocks to avoid machine verifer 162 // complaining about use of an undefined physical register. 163 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 164 BI != BE; ++BI) { 165 BI->addLiveIn(LaneVGPR); 166 } 167 } 168 169 Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 170 Spill.Lane = Lane; 171 return Spill; 172 } 173 174 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( 175 const MachineFunction &MF) const { 176 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 177 // FIXME: We should get this information from kernel attributes if it 178 // is available. 179 return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize(); 180 } 181