1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// SI implementation of the TargetRegisterInfo class. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "SIRegisterInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "SIInstrInfo.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/RegisterScavenging.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/LLVMContext.h" 26 27 using namespace llvm; 28 29 static bool hasPressureSet(const int *PSets, unsigned PSetID) { 30 for (unsigned i = 0; PSets[i] != -1; ++i) { 31 if (PSets[i] == (int)PSetID) 32 return true; 33 } 34 return false; 35 } 36 37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg, 38 BitVector &PressureSets) const { 39 for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) { 40 const int *PSets = getRegUnitPressureSets(*U); 41 if (hasPressureSet(PSets, PSetID)) { 42 PressureSets.set(PSetID); 43 break; 44 } 45 } 46 } 47 48 static cl::opt<bool> EnableSpillSGPRToSMEM( 49 "amdgpu-spill-sgpr-to-smem", 50 cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), 51 cl::init(false)); 52 53 static cl::opt<bool> EnableSpillSGPRToVGPR( 54 "amdgpu-spill-sgpr-to-vgpr", 55 cl::desc("Enable spilling VGPRs to SGPRs"), 56 cl::ReallyHidden, 57 cl::init(true)); 58 59 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : 60 AMDGPURegisterInfo(), 61 SGPRPressureSets(getNumRegPressureSets()), 62 VGPRPressureSets(getNumRegPressureSets()), 63 SpillSGPRToVGPR(false), 64 SpillSGPRToSMEM(false) { 65 if (EnableSpillSGPRToSMEM && ST.hasScalarStores()) 66 SpillSGPRToSMEM = true; 67 else if (EnableSpillSGPRToVGPR) 68 SpillSGPRToVGPR = true; 69 70 unsigned NumRegPressureSets = getNumRegPressureSets(); 71 72 SGPRSetID = NumRegPressureSets; 73 VGPRSetID = NumRegPressureSets; 74 75 for (unsigned i = 0; i < NumRegPressureSets; ++i) { 76 classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets); 77 classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets); 78 } 79 80 // Determine the number of reg units for each pressure set. 81 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0); 82 for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) { 83 const int *PSets = getRegUnitPressureSets(i); 84 for (unsigned j = 0; PSets[j] != -1; ++j) { 85 ++PressureSetRegUnits[PSets[j]]; 86 } 87 } 88 89 unsigned VGPRMax = 0, SGPRMax = 0; 90 for (unsigned i = 0; i < NumRegPressureSets; ++i) { 91 if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) { 92 VGPRSetID = i; 93 VGPRMax = PressureSetRegUnits[i]; 94 continue; 95 } 96 if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) { 97 SGPRSetID = i; 98 SGPRMax = PressureSetRegUnits[i]; 99 } 100 } 101 102 assert(SGPRSetID < NumRegPressureSets && 103 VGPRSetID < NumRegPressureSets); 104 } 105 106 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( 107 const MachineFunction &MF) const { 108 109 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 110 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; 111 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); 112 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 113 } 114 115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { 116 unsigned Reg; 117 118 // Try to place it in a hole after PrivateSegmentBufferReg. 119 if (RegCount & 3) { 120 // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to 121 // alignment constraints, so we have a hole where can put the wave offset. 122 Reg = RegCount - 1; 123 } else { 124 // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the 125 // wave offset before it. 126 Reg = RegCount - 5; 127 } 128 129 return Reg; 130 } 131 132 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( 133 const MachineFunction &MF) const { 134 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 135 unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); 136 return AMDGPU::SGPR_32RegClass.getRegister(Reg); 137 } 138 139 unsigned SIRegisterInfo::reservedStackPtrOffsetReg( 140 const MachineFunction &MF) const { 141 return AMDGPU::SGPR32; 142 } 143 144 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { 145 BitVector Reserved(getNumRegs()); 146 147 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but 148 // this seems likely to result in bugs, so I'm marking them as reserved. 149 reserveRegisterTuples(Reserved, AMDGPU::EXEC); 150 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); 151 152 // M0 has to be reserved so that llvm accepts it as a live-in into a block. 153 reserveRegisterTuples(Reserved, AMDGPU::M0); 154 155 // Reserve the memory aperture registers. 156 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); 157 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); 158 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); 159 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); 160 161 // Reserve xnack_mask registers - support is not implemented in Codegen. 162 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK); 163 164 // Reserve Trap Handler registers - support is not implemented in Codegen. 165 reserveRegisterTuples(Reserved, AMDGPU::TBA); 166 reserveRegisterTuples(Reserved, AMDGPU::TMA); 167 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1); 168 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3); 169 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5); 170 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); 171 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); 172 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); 173 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); 174 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); 175 176 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 177 178 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); 179 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 180 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { 181 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); 182 reserveRegisterTuples(Reserved, Reg); 183 } 184 185 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); 186 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); 187 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { 188 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); 189 reserveRegisterTuples(Reserved, Reg); 190 } 191 192 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 193 194 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); 195 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) { 196 // Reserve 1 SGPR for scratch wave offset in case we need to spill. 197 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg); 198 } 199 200 unsigned ScratchRSrcReg = MFI->getScratchRSrcReg(); 201 if (ScratchRSrcReg != AMDGPU::NoRegister) { 202 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need 203 // to spill. 204 // TODO: May need to reserve a VGPR if doing LDS spilling. 205 reserveRegisterTuples(Reserved, ScratchRSrcReg); 206 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); 207 } 208 209 // We have to assume the SP is needed in case there are calls in the function, 210 // which is detected after the function is lowered. If we aren't really going 211 // to need SP, don't bother reserving it. 212 unsigned StackPtrReg = MFI->getStackPtrOffsetReg(); 213 214 if (StackPtrReg != AMDGPU::NoRegister) { 215 reserveRegisterTuples(Reserved, StackPtrReg); 216 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); 217 } 218 219 unsigned FrameReg = MFI->getFrameOffsetReg(); 220 if (FrameReg != AMDGPU::NoRegister) { 221 reserveRegisterTuples(Reserved, FrameReg); 222 assert(!isSubRegister(ScratchRSrcReg, FrameReg)); 223 } 224 225 return Reserved; 226 } 227 228 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { 229 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>(); 230 if (Info->isEntryFunction()) { 231 const MachineFrameInfo &MFI = Fn.getFrameInfo(); 232 return MFI.hasStackObjects() || MFI.hasCalls(); 233 } 234 235 // May need scavenger for dealing with callee saved registers. 236 return true; 237 } 238 239 bool SIRegisterInfo::requiresFrameIndexScavenging( 240 const MachineFunction &MF) const { 241 const MachineFrameInfo &MFI = MF.getFrameInfo(); 242 if (MFI.hasStackObjects()) 243 return true; 244 245 // May need to deal with callee saved registers. 246 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 247 return !Info->isEntryFunction(); 248 } 249 250 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( 251 const MachineFunction &MF) const { 252 // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't 253 // create a virtual register for it during frame index elimination, so the 254 // scavenger is directly needed. 255 return MF.getFrameInfo().hasStackObjects() && 256 MF.getSubtarget<GCNSubtarget>().hasScalarStores() && 257 MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs(); 258 } 259 260 bool SIRegisterInfo::requiresVirtualBaseRegisters( 261 const MachineFunction &) const { 262 // There are no special dedicated stack or frame pointers. 263 return true; 264 } 265 266 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { 267 // This helps catch bugs as verifier errors. 268 return true; 269 } 270 271 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const { 272 assert(SIInstrInfo::isMUBUF(*MI)); 273 274 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 275 AMDGPU::OpName::offset); 276 return MI->getOperand(OffIdx).getImm(); 277 } 278 279 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI, 280 int Idx) const { 281 if (!SIInstrInfo::isMUBUF(*MI)) 282 return 0; 283 284 assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 285 AMDGPU::OpName::vaddr) && 286 "Should never see frame index on non-address operand"); 287 288 return getMUBUFInstrOffset(MI); 289 } 290 291 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { 292 if (!MI->mayLoadOrStore()) 293 return false; 294 295 int64_t FullOffset = Offset + getMUBUFInstrOffset(MI); 296 297 return !isUInt<12>(FullOffset); 298 } 299 300 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 301 unsigned BaseReg, 302 int FrameIdx, 303 int64_t Offset) const { 304 MachineBasicBlock::iterator Ins = MBB->begin(); 305 DebugLoc DL; // Defaults to "unknown" 306 307 if (Ins != MBB->end()) 308 DL = Ins->getDebugLoc(); 309 310 MachineFunction *MF = MBB->getParent(); 311 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); 312 const SIInstrInfo *TII = Subtarget.getInstrInfo(); 313 314 if (Offset == 0) { 315 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg) 316 .addFrameIndex(FrameIdx); 317 return; 318 } 319 320 MachineRegisterInfo &MRI = MF->getRegInfo(); 321 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 322 323 unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 324 325 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 326 .addImm(Offset); 327 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg) 328 .addFrameIndex(FrameIdx); 329 330 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg) 331 .addReg(OffsetReg, RegState::Kill) 332 .addReg(FIReg); 333 } 334 335 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, 336 int64_t Offset) const { 337 338 MachineBasicBlock *MBB = MI.getParent(); 339 MachineFunction *MF = MBB->getParent(); 340 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); 341 const SIInstrInfo *TII = Subtarget.getInstrInfo(); 342 343 #ifndef NDEBUG 344 // FIXME: Is it possible to be storing a frame index to itself? 345 bool SeenFI = false; 346 for (const MachineOperand &MO: MI.operands()) { 347 if (MO.isFI()) { 348 if (SeenFI) 349 llvm_unreachable("should not see multiple frame indices"); 350 351 SeenFI = true; 352 } 353 } 354 #endif 355 356 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); 357 assert(FIOp && FIOp->isFI() && "frame index must be address operand"); 358 assert(TII->isMUBUF(MI)); 359 assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == 360 MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() && 361 "should only be seeing frame offset relative FrameIndex"); 362 363 364 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); 365 int64_t NewOffset = OffsetOp->getImm() + Offset; 366 assert(isUInt<12>(NewOffset) && "offset should be legal"); 367 368 FIOp->ChangeToRegister(BaseReg, false); 369 OffsetOp->setImm(NewOffset); 370 } 371 372 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, 373 unsigned BaseReg, 374 int64_t Offset) const { 375 if (!SIInstrInfo::isMUBUF(*MI)) 376 return false; 377 378 int64_t NewOffset = Offset + getMUBUFInstrOffset(MI); 379 380 return isUInt<12>(NewOffset); 381 } 382 383 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( 384 const MachineFunction &MF, unsigned Kind) const { 385 // This is inaccurate. It depends on the instruction and address space. The 386 // only place where we should hit this is for dealing with frame indexes / 387 // private accesses, so this is correct in that case. 388 return &AMDGPU::VGPR_32RegClass; 389 } 390 391 static unsigned getNumSubRegsForSpillOp(unsigned Op) { 392 393 switch (Op) { 394 case AMDGPU::SI_SPILL_S512_SAVE: 395 case AMDGPU::SI_SPILL_S512_RESTORE: 396 case AMDGPU::SI_SPILL_V512_SAVE: 397 case AMDGPU::SI_SPILL_V512_RESTORE: 398 return 16; 399 case AMDGPU::SI_SPILL_S256_SAVE: 400 case AMDGPU::SI_SPILL_S256_RESTORE: 401 case AMDGPU::SI_SPILL_V256_SAVE: 402 case AMDGPU::SI_SPILL_V256_RESTORE: 403 return 8; 404 case AMDGPU::SI_SPILL_S128_SAVE: 405 case AMDGPU::SI_SPILL_S128_RESTORE: 406 case AMDGPU::SI_SPILL_V128_SAVE: 407 case AMDGPU::SI_SPILL_V128_RESTORE: 408 return 4; 409 case AMDGPU::SI_SPILL_V96_SAVE: 410 case AMDGPU::SI_SPILL_V96_RESTORE: 411 return 3; 412 case AMDGPU::SI_SPILL_S64_SAVE: 413 case AMDGPU::SI_SPILL_S64_RESTORE: 414 case AMDGPU::SI_SPILL_V64_SAVE: 415 case AMDGPU::SI_SPILL_V64_RESTORE: 416 return 2; 417 case AMDGPU::SI_SPILL_S32_SAVE: 418 case AMDGPU::SI_SPILL_S32_RESTORE: 419 case AMDGPU::SI_SPILL_V32_SAVE: 420 case AMDGPU::SI_SPILL_V32_RESTORE: 421 return 1; 422 default: llvm_unreachable("Invalid spill opcode"); 423 } 424 } 425 426 static int getOffsetMUBUFStore(unsigned Opc) { 427 switch (Opc) { 428 case AMDGPU::BUFFER_STORE_DWORD_OFFEN: 429 return AMDGPU::BUFFER_STORE_DWORD_OFFSET; 430 case AMDGPU::BUFFER_STORE_BYTE_OFFEN: 431 return AMDGPU::BUFFER_STORE_BYTE_OFFSET; 432 case AMDGPU::BUFFER_STORE_SHORT_OFFEN: 433 return AMDGPU::BUFFER_STORE_SHORT_OFFSET; 434 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: 435 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; 436 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: 437 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; 438 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN: 439 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET; 440 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN: 441 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET; 442 default: 443 return -1; 444 } 445 } 446 447 static int getOffsetMUBUFLoad(unsigned Opc) { 448 switch (Opc) { 449 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: 450 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 451 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN: 452 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET; 453 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN: 454 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET; 455 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN: 456 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET; 457 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN: 458 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; 459 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: 460 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; 461 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: 462 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; 463 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN: 464 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET; 465 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN: 466 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET; 467 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN: 468 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET; 469 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: 470 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET; 471 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN: 472 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET; 473 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN: 474 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET; 475 default: 476 return -1; 477 } 478 } 479 480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not 481 // need to handle the case where an SGPR may need to be spilled while spilling. 482 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, 483 MachineFrameInfo &MFI, 484 MachineBasicBlock::iterator MI, 485 int Index, 486 int64_t Offset) { 487 MachineBasicBlock *MBB = MI->getParent(); 488 const DebugLoc &DL = MI->getDebugLoc(); 489 bool IsStore = MI->mayStore(); 490 491 unsigned Opc = MI->getOpcode(); 492 int LoadStoreOp = IsStore ? 493 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc); 494 if (LoadStoreOp == -1) 495 return false; 496 497 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); 498 MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) 499 .add(*Reg) 500 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) 501 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) 502 .addImm(Offset) 503 .addImm(0) // glc 504 .addImm(0) // slc 505 .addImm(0) // tfe 506 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 507 508 const MachineOperand *VDataIn = TII->getNamedOperand(*MI, 509 AMDGPU::OpName::vdata_in); 510 if (VDataIn) 511 NewMI.add(*VDataIn); 512 return true; 513 } 514 515 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, 516 unsigned LoadStoreOp, 517 int Index, 518 unsigned ValueReg, 519 bool IsKill, 520 unsigned ScratchRsrcReg, 521 unsigned ScratchOffsetReg, 522 int64_t InstOffset, 523 MachineMemOperand *MMO, 524 RegScavenger *RS) const { 525 MachineBasicBlock *MBB = MI->getParent(); 526 MachineFunction *MF = MI->getParent()->getParent(); 527 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 528 const SIInstrInfo *TII = ST.getInstrInfo(); 529 const MachineFrameInfo &MFI = MF->getFrameInfo(); 530 531 const MCInstrDesc &Desc = TII->get(LoadStoreOp); 532 const DebugLoc &DL = MI->getDebugLoc(); 533 bool IsStore = Desc.mayStore(); 534 535 bool Scavenged = false; 536 unsigned SOffset = ScratchOffsetReg; 537 538 const unsigned EltSize = 4; 539 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); 540 unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT); 541 unsigned Size = NumSubRegs * EltSize; 542 int64_t Offset = InstOffset + MFI.getObjectOffset(Index); 543 int64_t ScratchOffsetRegDelta = 0; 544 545 unsigned Align = MFI.getObjectAlignment(Index); 546 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); 547 548 assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset"); 549 550 if (!isUInt<12>(Offset + Size - EltSize)) { 551 SOffset = AMDGPU::NoRegister; 552 553 // We currently only support spilling VGPRs to EltSize boundaries, meaning 554 // we can simplify the adjustment of Offset here to just scale with 555 // WavefrontSize. 556 Offset *= ST.getWavefrontSize(); 557 558 // We don't have access to the register scavenger if this function is called 559 // during PEI::scavengeFrameVirtualRegs(). 560 if (RS) 561 SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass); 562 563 if (SOffset == AMDGPU::NoRegister) { 564 // There are no free SGPRs, and since we are in the process of spilling 565 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true 566 // on SI/CI and on VI it is true until we implement spilling using scalar 567 // stores), we have no way to free up an SGPR. Our solution here is to 568 // add the offset directly to the ScratchOffset register, and then 569 // subtract the offset after the spill to return ScratchOffset to it's 570 // original value. 571 SOffset = ScratchOffsetReg; 572 ScratchOffsetRegDelta = Offset; 573 } else { 574 Scavenged = true; 575 } 576 577 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) 578 .addReg(ScratchOffsetReg) 579 .addImm(Offset); 580 581 Offset = 0; 582 } 583 584 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) { 585 unsigned SubReg = NumSubRegs == 1 ? 586 ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i)); 587 588 unsigned SOffsetRegState = 0; 589 unsigned SrcDstRegState = getDefRegState(!IsStore); 590 if (i + 1 == e) { 591 SOffsetRegState |= getKillRegState(Scavenged); 592 // The last implicit use carries the "Kill" flag. 593 SrcDstRegState |= getKillRegState(IsKill); 594 } 595 596 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i); 597 MachineMemOperand *NewMMO 598 = MF->getMachineMemOperand(PInfo, MMO->getFlags(), 599 EltSize, MinAlign(Align, EltSize * i)); 600 601 auto MIB = BuildMI(*MBB, MI, DL, Desc) 602 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)) 603 .addReg(ScratchRsrcReg) 604 .addReg(SOffset, SOffsetRegState) 605 .addImm(Offset) 606 .addImm(0) // glc 607 .addImm(0) // slc 608 .addImm(0) // tfe 609 .addMemOperand(NewMMO); 610 611 if (NumSubRegs > 1) 612 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); 613 } 614 615 if (ScratchOffsetRegDelta != 0) { 616 // Subtract the offset we added to the ScratchOffset register. 617 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg) 618 .addReg(ScratchOffsetReg) 619 .addImm(ScratchOffsetRegDelta); 620 } 621 } 622 623 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize, 624 bool Store) { 625 if (SuperRegSize % 16 == 0) { 626 return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR : 627 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR }; 628 } 629 630 if (SuperRegSize % 8 == 0) { 631 return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR : 632 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR }; 633 } 634 635 return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR : 636 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR}; 637 } 638 639 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, 640 int Index, 641 RegScavenger *RS, 642 bool OnlyToVGPR) const { 643 MachineBasicBlock *MBB = MI->getParent(); 644 MachineFunction *MF = MBB->getParent(); 645 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 646 DenseSet<unsigned> SGPRSpillVGPRDefinedSet; 647 648 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills 649 = MFI->getSGPRToVGPRSpills(Index); 650 bool SpillToVGPR = !VGPRSpills.empty(); 651 if (OnlyToVGPR && !SpillToVGPR) 652 return false; 653 654 MachineRegisterInfo &MRI = MF->getRegInfo(); 655 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 656 const SIInstrInfo *TII = ST.getInstrInfo(); 657 658 unsigned SuperReg = MI->getOperand(0).getReg(); 659 bool IsKill = MI->getOperand(0).isKill(); 660 const DebugLoc &DL = MI->getDebugLoc(); 661 662 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 663 664 bool SpillToSMEM = spillSGPRToSMEM(); 665 if (SpillToSMEM && OnlyToVGPR) 666 return false; 667 668 assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() && 669 SuperReg != MFI->getFrameOffsetReg() && 670 SuperReg != MFI->getScratchWaveOffsetReg())); 671 672 assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); 673 674 unsigned OffsetReg = AMDGPU::M0; 675 unsigned M0CopyReg = AMDGPU::NoRegister; 676 677 if (SpillToSMEM) { 678 if (RS->isRegUsed(AMDGPU::M0)) { 679 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 680 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) 681 .addReg(AMDGPU::M0); 682 } 683 } 684 685 unsigned ScalarStoreOp; 686 unsigned EltSize = 4; 687 const TargetRegisterClass *RC = getPhysRegClass(SuperReg); 688 if (SpillToSMEM && isSGPRClass(RC)) { 689 // XXX - if private_element_size is larger than 4 it might be useful to be 690 // able to spill wider vmem spills. 691 std::tie(EltSize, ScalarStoreOp) = 692 getSpillEltSize(getRegSizeInBits(*RC) / 8, true); 693 } 694 695 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); 696 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); 697 698 // SubReg carries the "Kill" flag when SubReg == SuperReg. 699 unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); 700 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 701 unsigned SubReg = NumSubRegs == 1 ? 702 SuperReg : getSubReg(SuperReg, SplitParts[i]); 703 704 if (SpillToSMEM) { 705 int64_t FrOffset = FrameInfo.getObjectOffset(Index); 706 707 // The allocated memory size is really the wavefront size * the frame 708 // index size. The widest register class is 64 bytes, so a 4-byte scratch 709 // allocation is enough to spill this in a single stack object. 710 // 711 // FIXME: Frame size/offsets are computed earlier than this, so the extra 712 // space is still unnecessarily allocated. 713 714 unsigned Align = FrameInfo.getObjectAlignment(Index); 715 MachinePointerInfo PtrInfo 716 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); 717 MachineMemOperand *MMO 718 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 719 EltSize, MinAlign(Align, EltSize * i)); 720 721 // SMEM instructions only support a single offset, so increment the wave 722 // offset. 723 724 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); 725 if (Offset != 0) { 726 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) 727 .addReg(MFI->getFrameOffsetReg()) 728 .addImm(Offset); 729 } else { 730 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 731 .addReg(MFI->getFrameOffsetReg()); 732 } 733 734 BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) 735 .addReg(SubReg, getKillRegState(IsKill)) // sdata 736 .addReg(MFI->getScratchRSrcReg()) // sbase 737 .addReg(OffsetReg, RegState::Kill) // soff 738 .addImm(0) // glc 739 .addMemOperand(MMO); 740 741 continue; 742 } 743 744 if (SpillToVGPR) { 745 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; 746 747 // During SGPR spilling to VGPR, determine if the VGPR is defined. The 748 // only circumstance in which we say it is undefined is when it is the 749 // first spill to this VGPR in the first basic block. 750 bool VGPRDefined = true; 751 if (MBB == &MF->front()) 752 VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second; 753 754 // Mark the "old value of vgpr" input undef only if this is the first sgpr 755 // spill to this specific vgpr in the first basic block. 756 BuildMI(*MBB, MI, DL, 757 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), 758 Spill.VGPR) 759 .addReg(SubReg, getKillRegState(IsKill)) 760 .addImm(Spill.Lane) 761 .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef); 762 763 // FIXME: Since this spills to another register instead of an actual 764 // frame index, we should delete the frame index when all references to 765 // it are fixed. 766 } else { 767 // XXX - Can to VGPR spill fail for some subregisters but not others? 768 if (OnlyToVGPR) 769 return false; 770 771 // Spill SGPR to a frame index. 772 // TODO: Should VI try to spill to VGPR and then spill to SMEM? 773 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 774 // TODO: Should VI try to spill to VGPR and then spill to SMEM? 775 776 MachineInstrBuilder Mov 777 = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 778 .addReg(SubReg, SubKillState); 779 780 781 // There could be undef components of a spilled super register. 782 // TODO: Can we detect this and skip the spill? 783 if (NumSubRegs > 1) { 784 // The last implicit use of the SuperReg carries the "Kill" flag. 785 unsigned SuperKillState = 0; 786 if (i + 1 == e) 787 SuperKillState |= getKillRegState(IsKill); 788 Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); 789 } 790 791 unsigned Align = FrameInfo.getObjectAlignment(Index); 792 MachinePointerInfo PtrInfo 793 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); 794 MachineMemOperand *MMO 795 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 796 EltSize, MinAlign(Align, EltSize * i)); 797 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) 798 .addReg(TmpReg, RegState::Kill) // src 799 .addFrameIndex(Index) // vaddr 800 .addReg(MFI->getScratchRSrcReg()) // srrsrc 801 .addReg(MFI->getFrameOffsetReg()) // soffset 802 .addImm(i * 4) // offset 803 .addMemOperand(MMO); 804 } 805 } 806 807 if (M0CopyReg != AMDGPU::NoRegister) { 808 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) 809 .addReg(M0CopyReg, RegState::Kill); 810 } 811 812 MI->eraseFromParent(); 813 MFI->addToSpilledSGPRs(NumSubRegs); 814 return true; 815 } 816 817 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, 818 int Index, 819 RegScavenger *RS, 820 bool OnlyToVGPR) const { 821 MachineFunction *MF = MI->getParent()->getParent(); 822 MachineRegisterInfo &MRI = MF->getRegInfo(); 823 MachineBasicBlock *MBB = MI->getParent(); 824 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 825 826 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills 827 = MFI->getSGPRToVGPRSpills(Index); 828 bool SpillToVGPR = !VGPRSpills.empty(); 829 if (OnlyToVGPR && !SpillToVGPR) 830 return false; 831 832 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 833 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 834 const SIInstrInfo *TII = ST.getInstrInfo(); 835 const DebugLoc &DL = MI->getDebugLoc(); 836 837 unsigned SuperReg = MI->getOperand(0).getReg(); 838 bool SpillToSMEM = spillSGPRToSMEM(); 839 if (SpillToSMEM && OnlyToVGPR) 840 return false; 841 842 assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); 843 844 unsigned OffsetReg = AMDGPU::M0; 845 unsigned M0CopyReg = AMDGPU::NoRegister; 846 847 if (SpillToSMEM) { 848 if (RS->isRegUsed(AMDGPU::M0)) { 849 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 850 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) 851 .addReg(AMDGPU::M0); 852 } 853 } 854 855 unsigned EltSize = 4; 856 unsigned ScalarLoadOp; 857 858 const TargetRegisterClass *RC = getPhysRegClass(SuperReg); 859 if (SpillToSMEM && isSGPRClass(RC)) { 860 // XXX - if private_element_size is larger than 4 it might be useful to be 861 // able to spill wider vmem spills. 862 std::tie(EltSize, ScalarLoadOp) = 863 getSpillEltSize(getRegSizeInBits(*RC) / 8, false); 864 } 865 866 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); 867 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); 868 869 // SubReg carries the "Kill" flag when SubReg == SuperReg. 870 int64_t FrOffset = FrameInfo.getObjectOffset(Index); 871 872 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 873 unsigned SubReg = NumSubRegs == 1 ? 874 SuperReg : getSubReg(SuperReg, SplitParts[i]); 875 876 if (SpillToSMEM) { 877 // FIXME: Size may be > 4 but extra bytes wasted. 878 unsigned Align = FrameInfo.getObjectAlignment(Index); 879 MachinePointerInfo PtrInfo 880 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); 881 MachineMemOperand *MMO 882 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 883 EltSize, MinAlign(Align, EltSize * i)); 884 885 // Add i * 4 offset 886 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); 887 if (Offset != 0) { 888 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) 889 .addReg(MFI->getFrameOffsetReg()) 890 .addImm(Offset); 891 } else { 892 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 893 .addReg(MFI->getFrameOffsetReg()); 894 } 895 896 auto MIB = 897 BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) 898 .addReg(MFI->getScratchRSrcReg()) // sbase 899 .addReg(OffsetReg, RegState::Kill) // soff 900 .addImm(0) // glc 901 .addMemOperand(MMO); 902 903 if (NumSubRegs > 1) 904 MIB.addReg(SuperReg, RegState::ImplicitDefine); 905 906 continue; 907 } 908 909 if (SpillToVGPR) { 910 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; 911 auto MIB = 912 BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), 913 SubReg) 914 .addReg(Spill.VGPR) 915 .addImm(Spill.Lane); 916 917 if (NumSubRegs > 1) 918 MIB.addReg(SuperReg, RegState::ImplicitDefine); 919 } else { 920 if (OnlyToVGPR) 921 return false; 922 923 // Restore SGPR from a stack slot. 924 // FIXME: We should use S_LOAD_DWORD here for VI. 925 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 926 unsigned Align = FrameInfo.getObjectAlignment(Index); 927 928 MachinePointerInfo PtrInfo 929 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); 930 931 MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, 932 MachineMemOperand::MOLoad, EltSize, 933 MinAlign(Align, EltSize * i)); 934 935 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) 936 .addFrameIndex(Index) // vaddr 937 .addReg(MFI->getScratchRSrcReg()) // srsrc 938 .addReg(MFI->getFrameOffsetReg()) // soffset 939 .addImm(i * 4) // offset 940 .addMemOperand(MMO); 941 942 auto MIB = 943 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) 944 .addReg(TmpReg, RegState::Kill); 945 946 if (NumSubRegs > 1) 947 MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); 948 } 949 } 950 951 if (M0CopyReg != AMDGPU::NoRegister) { 952 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) 953 .addReg(M0CopyReg, RegState::Kill); 954 } 955 956 MI->eraseFromParent(); 957 return true; 958 } 959 960 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to 961 /// a VGPR and the stack slot can be safely eliminated when all other users are 962 /// handled. 963 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( 964 MachineBasicBlock::iterator MI, 965 int FI, 966 RegScavenger *RS) const { 967 switch (MI->getOpcode()) { 968 case AMDGPU::SI_SPILL_S512_SAVE: 969 case AMDGPU::SI_SPILL_S256_SAVE: 970 case AMDGPU::SI_SPILL_S128_SAVE: 971 case AMDGPU::SI_SPILL_S64_SAVE: 972 case AMDGPU::SI_SPILL_S32_SAVE: 973 return spillSGPR(MI, FI, RS, true); 974 case AMDGPU::SI_SPILL_S512_RESTORE: 975 case AMDGPU::SI_SPILL_S256_RESTORE: 976 case AMDGPU::SI_SPILL_S128_RESTORE: 977 case AMDGPU::SI_SPILL_S64_RESTORE: 978 case AMDGPU::SI_SPILL_S32_RESTORE: 979 return restoreSGPR(MI, FI, RS, true); 980 default: 981 llvm_unreachable("not an SGPR spill instruction"); 982 } 983 } 984 985 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, 986 int SPAdj, unsigned FIOperandNum, 987 RegScavenger *RS) const { 988 MachineFunction *MF = MI->getParent()->getParent(); 989 MachineRegisterInfo &MRI = MF->getRegInfo(); 990 MachineBasicBlock *MBB = MI->getParent(); 991 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 992 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 993 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 994 const SIInstrInfo *TII = ST.getInstrInfo(); 995 DebugLoc DL = MI->getDebugLoc(); 996 997 MachineOperand &FIOp = MI->getOperand(FIOperandNum); 998 int Index = MI->getOperand(FIOperandNum).getIndex(); 999 1000 switch (MI->getOpcode()) { 1001 // SGPR register spill 1002 case AMDGPU::SI_SPILL_S512_SAVE: 1003 case AMDGPU::SI_SPILL_S256_SAVE: 1004 case AMDGPU::SI_SPILL_S128_SAVE: 1005 case AMDGPU::SI_SPILL_S64_SAVE: 1006 case AMDGPU::SI_SPILL_S32_SAVE: { 1007 spillSGPR(MI, Index, RS); 1008 break; 1009 } 1010 1011 // SGPR register restore 1012 case AMDGPU::SI_SPILL_S512_RESTORE: 1013 case AMDGPU::SI_SPILL_S256_RESTORE: 1014 case AMDGPU::SI_SPILL_S128_RESTORE: 1015 case AMDGPU::SI_SPILL_S64_RESTORE: 1016 case AMDGPU::SI_SPILL_S32_RESTORE: { 1017 restoreSGPR(MI, Index, RS); 1018 break; 1019 } 1020 1021 // VGPR register spill 1022 case AMDGPU::SI_SPILL_V512_SAVE: 1023 case AMDGPU::SI_SPILL_V256_SAVE: 1024 case AMDGPU::SI_SPILL_V128_SAVE: 1025 case AMDGPU::SI_SPILL_V96_SAVE: 1026 case AMDGPU::SI_SPILL_V64_SAVE: 1027 case AMDGPU::SI_SPILL_V32_SAVE: { 1028 const MachineOperand *VData = TII->getNamedOperand(*MI, 1029 AMDGPU::OpName::vdata); 1030 buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, 1031 Index, 1032 VData->getReg(), VData->isKill(), 1033 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), 1034 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), 1035 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 1036 *MI->memoperands_begin(), 1037 RS); 1038 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); 1039 MI->eraseFromParent(); 1040 break; 1041 } 1042 case AMDGPU::SI_SPILL_V32_RESTORE: 1043 case AMDGPU::SI_SPILL_V64_RESTORE: 1044 case AMDGPU::SI_SPILL_V96_RESTORE: 1045 case AMDGPU::SI_SPILL_V128_RESTORE: 1046 case AMDGPU::SI_SPILL_V256_RESTORE: 1047 case AMDGPU::SI_SPILL_V512_RESTORE: { 1048 const MachineOperand *VData = TII->getNamedOperand(*MI, 1049 AMDGPU::OpName::vdata); 1050 1051 buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, 1052 Index, 1053 VData->getReg(), VData->isKill(), 1054 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), 1055 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), 1056 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 1057 *MI->memoperands_begin(), 1058 RS); 1059 MI->eraseFromParent(); 1060 break; 1061 } 1062 1063 default: { 1064 const DebugLoc &DL = MI->getDebugLoc(); 1065 bool IsMUBUF = TII->isMUBUF(*MI); 1066 1067 if (!IsMUBUF && 1068 MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) { 1069 // Convert to an absolute stack address by finding the offset from the 1070 // scratch wave base and scaling by the wave size. 1071 // 1072 // In an entry function/kernel the stack address is already the 1073 // absolute address relative to the scratch wave offset. 1074 1075 unsigned DiffReg 1076 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1077 1078 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; 1079 unsigned ResultReg = IsCopy ? 1080 MI->getOperand(0).getReg() : 1081 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1082 1083 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg) 1084 .addReg(MFI->getFrameOffsetReg()) 1085 .addReg(MFI->getScratchWaveOffsetReg()); 1086 1087 int64_t Offset = FrameInfo.getObjectOffset(Index); 1088 if (Offset == 0) { 1089 // XXX - This never happens because of emergency scavenging slot at 0? 1090 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) 1091 .addImm(Log2_32(ST.getWavefrontSize())) 1092 .addReg(DiffReg); 1093 } else { 1094 unsigned ScaledReg 1095 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1096 1097 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) 1098 .addImm(Log2_32(ST.getWavefrontSize())) 1099 .addReg(DiffReg, RegState::Kill); 1100 1101 // TODO: Fold if use instruction is another add of a constant. 1102 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { 1103 TII->getAddNoCarry(*MBB, MI, DL, ResultReg) 1104 .addImm(Offset) 1105 .addReg(ScaledReg, RegState::Kill); 1106 } else { 1107 unsigned ConstOffsetReg 1108 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1109 1110 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) 1111 .addImm(Offset); 1112 TII->getAddNoCarry(*MBB, MI, DL, ResultReg) 1113 .addReg(ConstOffsetReg, RegState::Kill) 1114 .addReg(ScaledReg, RegState::Kill); 1115 } 1116 } 1117 1118 // Don't introduce an extra copy if we're just materializing in a mov. 1119 if (IsCopy) 1120 MI->eraseFromParent(); 1121 else 1122 FIOp.ChangeToRegister(ResultReg, false, false, true); 1123 return; 1124 } 1125 1126 if (IsMUBUF) { 1127 // Disable offen so we don't need a 0 vgpr base. 1128 assert(static_cast<int>(FIOperandNum) == 1129 AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1130 AMDGPU::OpName::vaddr)); 1131 1132 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() 1133 == MFI->getFrameOffsetReg()); 1134 1135 int64_t Offset = FrameInfo.getObjectOffset(Index); 1136 int64_t OldImm 1137 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); 1138 int64_t NewOffset = OldImm + Offset; 1139 1140 if (isUInt<12>(NewOffset) && 1141 buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) { 1142 MI->eraseFromParent(); 1143 return; 1144 } 1145 } 1146 1147 // If the offset is simply too big, don't convert to a scratch wave offset 1148 // relative index. 1149 1150 int64_t Offset = FrameInfo.getObjectOffset(Index); 1151 FIOp.ChangeToImmediate(Offset); 1152 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { 1153 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1154 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 1155 .addImm(Offset); 1156 FIOp.ChangeToRegister(TmpReg, false, false, true); 1157 } 1158 } 1159 } 1160 } 1161 1162 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { 1163 #define AMDGPU_REG_ASM_NAMES 1164 #include "AMDGPURegAsmNames.inc.cpp" 1165 1166 #define REG_RANGE(BeginReg, EndReg, RegTable) \ 1167 if (Reg >= BeginReg && Reg <= EndReg) { \ 1168 unsigned Index = Reg - BeginReg; \ 1169 assert(Index < array_lengthof(RegTable)); \ 1170 return RegTable[Index]; \ 1171 } 1172 1173 REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); 1174 REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); 1175 REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); 1176 REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); 1177 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, 1178 VGPR96RegNames); 1179 1180 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, 1181 AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, 1182 VGPR128RegNames); 1183 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, 1184 AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, 1185 SGPR128RegNames); 1186 1187 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, 1188 AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, 1189 VGPR256RegNames); 1190 1191 REG_RANGE( 1192 AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, 1193 AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, 1194 VGPR512RegNames); 1195 1196 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, 1197 AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, 1198 SGPR256RegNames); 1199 1200 REG_RANGE( 1201 AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, 1202 AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, 1203 SGPR512RegNames 1204 ); 1205 1206 #undef REG_RANGE 1207 1208 // FIXME: Rename flat_scr so we don't need to special case this. 1209 switch (Reg) { 1210 case AMDGPU::FLAT_SCR: 1211 return "flat_scratch"; 1212 case AMDGPU::FLAT_SCR_LO: 1213 return "flat_scratch_lo"; 1214 case AMDGPU::FLAT_SCR_HI: 1215 return "flat_scratch_hi"; 1216 default: 1217 // For the special named registers the default is fine. 1218 return TargetRegisterInfo::getRegAsmName(Reg); 1219 } 1220 } 1221 1222 // FIXME: This is very slow. It might be worth creating a map from physreg to 1223 // register class. 1224 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { 1225 assert(!TargetRegisterInfo::isVirtualRegister(Reg)); 1226 1227 static const TargetRegisterClass *const BaseClasses[] = { 1228 &AMDGPU::VGPR_32RegClass, 1229 &AMDGPU::SReg_32RegClass, 1230 &AMDGPU::VReg_64RegClass, 1231 &AMDGPU::SReg_64RegClass, 1232 &AMDGPU::VReg_96RegClass, 1233 &AMDGPU::VReg_128RegClass, 1234 &AMDGPU::SReg_128RegClass, 1235 &AMDGPU::VReg_256RegClass, 1236 &AMDGPU::SReg_256RegClass, 1237 &AMDGPU::VReg_512RegClass, 1238 &AMDGPU::SReg_512RegClass, 1239 &AMDGPU::SCC_CLASSRegClass, 1240 &AMDGPU::Pseudo_SReg_32RegClass, 1241 &AMDGPU::Pseudo_SReg_128RegClass, 1242 }; 1243 1244 for (const TargetRegisterClass *BaseClass : BaseClasses) { 1245 if (BaseClass->contains(Reg)) { 1246 return BaseClass; 1247 } 1248 } 1249 return nullptr; 1250 } 1251 1252 // TODO: It might be helpful to have some target specific flags in 1253 // TargetRegisterClass to mark which classes are VGPRs to make this trivial. 1254 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { 1255 unsigned Size = getRegSizeInBits(*RC); 1256 if (Size < 32) 1257 return false; 1258 switch (Size) { 1259 case 32: 1260 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; 1261 case 64: 1262 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; 1263 case 96: 1264 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; 1265 case 128: 1266 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; 1267 case 256: 1268 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; 1269 case 512: 1270 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; 1271 default: 1272 llvm_unreachable("Invalid register class size"); 1273 } 1274 } 1275 1276 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( 1277 const TargetRegisterClass *SRC) const { 1278 switch (getRegSizeInBits(*SRC)) { 1279 case 32: 1280 return &AMDGPU::VGPR_32RegClass; 1281 case 64: 1282 return &AMDGPU::VReg_64RegClass; 1283 case 96: 1284 return &AMDGPU::VReg_96RegClass; 1285 case 128: 1286 return &AMDGPU::VReg_128RegClass; 1287 case 256: 1288 return &AMDGPU::VReg_256RegClass; 1289 case 512: 1290 return &AMDGPU::VReg_512RegClass; 1291 default: 1292 llvm_unreachable("Invalid register class size"); 1293 } 1294 } 1295 1296 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( 1297 const TargetRegisterClass *VRC) const { 1298 switch (getRegSizeInBits(*VRC)) { 1299 case 32: 1300 return &AMDGPU::SGPR_32RegClass; 1301 case 64: 1302 return &AMDGPU::SReg_64RegClass; 1303 case 128: 1304 return &AMDGPU::SReg_128RegClass; 1305 case 256: 1306 return &AMDGPU::SReg_256RegClass; 1307 case 512: 1308 return &AMDGPU::SReg_512RegClass; 1309 default: 1310 llvm_unreachable("Invalid register class size"); 1311 } 1312 } 1313 1314 const TargetRegisterClass *SIRegisterInfo::getSubRegClass( 1315 const TargetRegisterClass *RC, unsigned SubIdx) const { 1316 if (SubIdx == AMDGPU::NoSubRegister) 1317 return RC; 1318 1319 // We can assume that each lane corresponds to one 32-bit register. 1320 unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes(); 1321 if (isSGPRClass(RC)) { 1322 switch (Count) { 1323 case 1: 1324 return &AMDGPU::SGPR_32RegClass; 1325 case 2: 1326 return &AMDGPU::SReg_64RegClass; 1327 case 4: 1328 return &AMDGPU::SReg_128RegClass; 1329 case 8: 1330 return &AMDGPU::SReg_256RegClass; 1331 case 16: /* fall-through */ 1332 default: 1333 llvm_unreachable("Invalid sub-register class size"); 1334 } 1335 } else { 1336 switch (Count) { 1337 case 1: 1338 return &AMDGPU::VGPR_32RegClass; 1339 case 2: 1340 return &AMDGPU::VReg_64RegClass; 1341 case 3: 1342 return &AMDGPU::VReg_96RegClass; 1343 case 4: 1344 return &AMDGPU::VReg_128RegClass; 1345 case 8: 1346 return &AMDGPU::VReg_256RegClass; 1347 case 16: /* fall-through */ 1348 default: 1349 llvm_unreachable("Invalid sub-register class size"); 1350 } 1351 } 1352 } 1353 1354 bool SIRegisterInfo::shouldRewriteCopySrc( 1355 const TargetRegisterClass *DefRC, 1356 unsigned DefSubReg, 1357 const TargetRegisterClass *SrcRC, 1358 unsigned SrcSubReg) const { 1359 // We want to prefer the smallest register class possible, so we don't want to 1360 // stop and rewrite on anything that looks like a subregister 1361 // extract. Operations mostly don't care about the super register class, so we 1362 // only want to stop on the most basic of copies between the same register 1363 // class. 1364 // 1365 // e.g. if we have something like 1366 // %0 = ... 1367 // %1 = ... 1368 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2 1369 // %3 = COPY %2, sub0 1370 // 1371 // We want to look through the COPY to find: 1372 // => %3 = COPY %0 1373 1374 // Plain copy. 1375 return getCommonSubClass(DefRC, SrcRC) != nullptr; 1376 } 1377 1378 /// Returns a register that is not used at any point in the function. 1379 /// If all registers are used, then this function will return 1380 // AMDGPU::NoRegister. 1381 unsigned 1382 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, 1383 const TargetRegisterClass *RC, 1384 const MachineFunction &MF) const { 1385 1386 for (unsigned Reg : *RC) 1387 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 1388 return Reg; 1389 return AMDGPU::NoRegister; 1390 } 1391 1392 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, 1393 unsigned EltSize) const { 1394 if (EltSize == 4) { 1395 static const int16_t Sub0_15[] = { 1396 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 1397 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 1398 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 1399 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 1400 }; 1401 1402 static const int16_t Sub0_7[] = { 1403 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 1404 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 1405 }; 1406 1407 static const int16_t Sub0_3[] = { 1408 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 1409 }; 1410 1411 static const int16_t Sub0_2[] = { 1412 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 1413 }; 1414 1415 static const int16_t Sub0_1[] = { 1416 AMDGPU::sub0, AMDGPU::sub1, 1417 }; 1418 1419 switch (AMDGPU::getRegBitWidth(*RC->MC)) { 1420 case 32: 1421 return {}; 1422 case 64: 1423 return makeArrayRef(Sub0_1); 1424 case 96: 1425 return makeArrayRef(Sub0_2); 1426 case 128: 1427 return makeArrayRef(Sub0_3); 1428 case 256: 1429 return makeArrayRef(Sub0_7); 1430 case 512: 1431 return makeArrayRef(Sub0_15); 1432 default: 1433 llvm_unreachable("unhandled register size"); 1434 } 1435 } 1436 1437 if (EltSize == 8) { 1438 static const int16_t Sub0_15_64[] = { 1439 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 1440 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 1441 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, 1442 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15 1443 }; 1444 1445 static const int16_t Sub0_7_64[] = { 1446 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 1447 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7 1448 }; 1449 1450 1451 static const int16_t Sub0_3_64[] = { 1452 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3 1453 }; 1454 1455 switch (AMDGPU::getRegBitWidth(*RC->MC)) { 1456 case 64: 1457 return {}; 1458 case 128: 1459 return makeArrayRef(Sub0_3_64); 1460 case 256: 1461 return makeArrayRef(Sub0_7_64); 1462 case 512: 1463 return makeArrayRef(Sub0_15_64); 1464 default: 1465 llvm_unreachable("unhandled register size"); 1466 } 1467 } 1468 1469 assert(EltSize == 16 && "unhandled register spill split size"); 1470 1471 static const int16_t Sub0_15_128[] = { 1472 AMDGPU::sub0_sub1_sub2_sub3, 1473 AMDGPU::sub4_sub5_sub6_sub7, 1474 AMDGPU::sub8_sub9_sub10_sub11, 1475 AMDGPU::sub12_sub13_sub14_sub15 1476 }; 1477 1478 static const int16_t Sub0_7_128[] = { 1479 AMDGPU::sub0_sub1_sub2_sub3, 1480 AMDGPU::sub4_sub5_sub6_sub7 1481 }; 1482 1483 switch (AMDGPU::getRegBitWidth(*RC->MC)) { 1484 case 128: 1485 return {}; 1486 case 256: 1487 return makeArrayRef(Sub0_7_128); 1488 case 512: 1489 return makeArrayRef(Sub0_15_128); 1490 default: 1491 llvm_unreachable("unhandled register size"); 1492 } 1493 } 1494 1495 const TargetRegisterClass* 1496 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, 1497 unsigned Reg) const { 1498 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1499 return MRI.getRegClass(Reg); 1500 1501 return getPhysRegClass(Reg); 1502 } 1503 1504 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, 1505 unsigned Reg) const { 1506 const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg); 1507 assert(RC && "Register class for the reg not found"); 1508 return hasVGPRs(RC); 1509 } 1510 1511 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, 1512 const TargetRegisterClass *SrcRC, 1513 unsigned SubReg, 1514 const TargetRegisterClass *DstRC, 1515 unsigned DstSubReg, 1516 const TargetRegisterClass *NewRC, 1517 LiveIntervals &LIS) const { 1518 unsigned SrcSize = getRegSizeInBits(*SrcRC); 1519 unsigned DstSize = getRegSizeInBits(*DstRC); 1520 unsigned NewSize = getRegSizeInBits(*NewRC); 1521 1522 // Do not increase size of registers beyond dword, we would need to allocate 1523 // adjacent registers and constraint regalloc more than needed. 1524 1525 // Always allow dword coalescing. 1526 if (SrcSize <= 32 || DstSize <= 32) 1527 return true; 1528 1529 return NewSize <= DstSize || NewSize <= SrcSize; 1530 } 1531 1532 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 1533 MachineFunction &MF) const { 1534 1535 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1536 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1537 1538 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), 1539 MF.getFunction()); 1540 switch (RC->getID()) { 1541 default: 1542 return AMDGPURegisterInfo::getRegPressureLimit(RC, MF); 1543 case AMDGPU::VGPR_32RegClassID: 1544 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); 1545 case AMDGPU::SGPR_32RegClassID: 1546 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF)); 1547 } 1548 } 1549 1550 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, 1551 unsigned Idx) const { 1552 if (Idx == getVGPRPressureSet()) 1553 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, 1554 const_cast<MachineFunction &>(MF)); 1555 1556 if (Idx == getSGPRPressureSet()) 1557 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass, 1558 const_cast<MachineFunction &>(MF)); 1559 1560 return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx); 1561 } 1562 1563 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { 1564 static const int Empty[] = { -1 }; 1565 1566 if (hasRegUnit(AMDGPU::M0, RegUnit)) 1567 return Empty; 1568 return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit); 1569 } 1570 1571 unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const { 1572 // Not a callee saved register. 1573 return AMDGPU::SGPR30_SGPR31; 1574 } 1575 1576 const TargetRegisterClass * 1577 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, 1578 const MachineRegisterInfo &MRI) const { 1579 unsigned Size = getRegSizeInBits(MO.getReg(), MRI); 1580 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); 1581 if (!RB) 1582 return nullptr; 1583 1584 switch (Size) { 1585 case 32: 1586 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : 1587 &AMDGPU::SReg_32_XM0RegClass; 1588 case 64: 1589 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass : 1590 &AMDGPU::SReg_64_XEXECRegClass; 1591 case 96: 1592 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass : 1593 nullptr; 1594 case 128: 1595 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass : 1596 &AMDGPU::SReg_128RegClass; 1597 default: 1598 llvm_unreachable("not implemented"); 1599 } 1600 } 1601