1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI implementation of the TargetRegisterInfo class. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "SIRegisterInfo.h" 16 #include "SIInstrInfo.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/RegisterScavenging.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/LLVMContext.h" 23 24 using namespace llvm; 25 26 SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} 27 28 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { 29 MCRegAliasIterator R(Reg, this, true); 30 31 for (; R.isValid(); ++R) 32 Reserved.set(*R); 33 } 34 35 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( 36 const MachineFunction &MF) const { 37 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 38 if (ST.hasSGPRInitBug()) { 39 unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; 40 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); 41 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 42 } 43 44 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 45 // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the 46 // next sgpr128 down. 47 return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; 48 } 49 50 return AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99; 51 } 52 53 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( 54 const MachineFunction &MF) const { 55 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 56 if (ST.hasSGPRInitBug()) { 57 unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; 58 return AMDGPU::SGPR_32RegClass.getRegister(Idx); 59 } 60 61 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 62 // Next register before reservations for flat_scr and vcc. 63 return AMDGPU::SGPR97; 64 } 65 66 return AMDGPU::SGPR95; 67 } 68 69 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { 70 BitVector Reserved(getNumRegs()); 71 Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); 72 73 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but 74 // this seems likely to result in bugs, so I'm marking them as reserved. 75 reserveRegisterTuples(Reserved, AMDGPU::EXEC); 76 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); 77 78 // Reserve the last 2 registers so we will always have at least 2 more that 79 // will physically contain VCC. 80 reserveRegisterTuples(Reserved, AMDGPU::SGPR102_SGPR103); 81 82 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 83 84 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 85 // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation 86 // for VCC/FLAT_SCR. 87 reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); 88 reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); 89 } 90 91 // Tonga and Iceland can only allocate a fixed number of SGPRs due 92 // to a hw bug. 93 if (ST.hasSGPRInitBug()) { 94 unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 95 // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). 96 // Assume XNACK_MASK is unused. 97 unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; 98 99 for (unsigned i = Limit; i < NumSGPRs; ++i) { 100 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); 101 reserveRegisterTuples(Reserved, Reg); 102 } 103 } 104 105 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 106 107 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); 108 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) { 109 // Reserve 1 SGPR for scratch wave offset in case we need to spill. 110 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg); 111 } 112 113 unsigned ScratchRSrcReg = MFI->getScratchRSrcReg(); 114 if (ScratchRSrcReg != AMDGPU::NoRegister) { 115 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need 116 // to spill. 117 // TODO: May need to reserve a VGPR if doing LDS spilling. 118 reserveRegisterTuples(Reserved, ScratchRSrcReg); 119 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); 120 } 121 122 return Reserved; 123 } 124 125 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, 126 unsigned Idx) const { 127 const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>(); 128 // FIXME: We should adjust the max number of waves based on LDS size. 129 unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(), 130 STI.getMaxWavesPerCU()); 131 unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU()); 132 133 unsigned VSLimit = SGPRLimit + VGPRLimit; 134 135 for (regclass_iterator I = regclass_begin(), E = regclass_end(); 136 I != E; ++I) { 137 const TargetRegisterClass *RC = *I; 138 139 unsigned NumSubRegs = std::max((int)RC->getSize() / 4, 1); 140 unsigned Limit; 141 142 if (isPseudoRegClass(RC)) { 143 // FIXME: This is a hack. We should never be considering the pressure of 144 // these since no virtual register should ever have this class. 145 Limit = VSLimit; 146 } else if (isSGPRClass(RC)) { 147 Limit = SGPRLimit / NumSubRegs; 148 } else { 149 Limit = VGPRLimit / NumSubRegs; 150 } 151 152 const int *Sets = getRegClassPressureSets(RC); 153 assert(Sets); 154 for (unsigned i = 0; Sets[i] != -1; ++i) { 155 if (Sets[i] == (int)Idx) 156 return Limit; 157 } 158 } 159 return 256; 160 } 161 162 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { 163 return Fn.getFrameInfo()->hasStackObjects(); 164 } 165 166 static unsigned getNumSubRegsForSpillOp(unsigned Op) { 167 168 switch (Op) { 169 case AMDGPU::SI_SPILL_S512_SAVE: 170 case AMDGPU::SI_SPILL_S512_RESTORE: 171 case AMDGPU::SI_SPILL_V512_SAVE: 172 case AMDGPU::SI_SPILL_V512_RESTORE: 173 return 16; 174 case AMDGPU::SI_SPILL_S256_SAVE: 175 case AMDGPU::SI_SPILL_S256_RESTORE: 176 case AMDGPU::SI_SPILL_V256_SAVE: 177 case AMDGPU::SI_SPILL_V256_RESTORE: 178 return 8; 179 case AMDGPU::SI_SPILL_S128_SAVE: 180 case AMDGPU::SI_SPILL_S128_RESTORE: 181 case AMDGPU::SI_SPILL_V128_SAVE: 182 case AMDGPU::SI_SPILL_V128_RESTORE: 183 return 4; 184 case AMDGPU::SI_SPILL_V96_SAVE: 185 case AMDGPU::SI_SPILL_V96_RESTORE: 186 return 3; 187 case AMDGPU::SI_SPILL_S64_SAVE: 188 case AMDGPU::SI_SPILL_S64_RESTORE: 189 case AMDGPU::SI_SPILL_V64_SAVE: 190 case AMDGPU::SI_SPILL_V64_RESTORE: 191 return 2; 192 case AMDGPU::SI_SPILL_S32_SAVE: 193 case AMDGPU::SI_SPILL_S32_RESTORE: 194 case AMDGPU::SI_SPILL_V32_SAVE: 195 case AMDGPU::SI_SPILL_V32_RESTORE: 196 return 1; 197 default: llvm_unreachable("Invalid spill opcode"); 198 } 199 } 200 201 void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, 202 unsigned LoadStoreOp, 203 unsigned Value, 204 unsigned ScratchRsrcReg, 205 unsigned ScratchOffset, 206 int64_t Offset, 207 RegScavenger *RS) const { 208 209 MachineBasicBlock *MBB = MI->getParent(); 210 const MachineFunction *MF = MI->getParent()->getParent(); 211 const SIInstrInfo *TII = 212 static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); 213 LLVMContext &Ctx = MF->getFunction()->getContext(); 214 DebugLoc DL = MI->getDebugLoc(); 215 bool IsLoad = TII->get(LoadStoreOp).mayLoad(); 216 217 bool RanOutOfSGPRs = false; 218 unsigned SOffset = ScratchOffset; 219 220 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 221 unsigned Size = NumSubRegs * 4; 222 223 if (!isUInt<12>(Offset + Size)) { 224 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); 225 if (SOffset == AMDGPU::NoRegister) { 226 RanOutOfSGPRs = true; 227 SOffset = AMDGPU::SGPR0; 228 } 229 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) 230 .addReg(ScratchOffset) 231 .addImm(Offset); 232 Offset = 0; 233 } 234 235 if (RanOutOfSGPRs) 236 Ctx.emitError("Ran out of SGPRs for spilling VGPRS"); 237 238 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) { 239 unsigned SubReg = NumSubRegs > 1 ? 240 getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) : 241 Value; 242 243 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) 244 .addReg(SubReg, getDefRegState(IsLoad)) 245 .addReg(ScratchRsrcReg) 246 .addReg(SOffset) 247 .addImm(Offset) 248 .addImm(0) // glc 249 .addImm(0) // slc 250 .addImm(0) // tfe 251 .addReg(Value, RegState::Implicit | getDefRegState(IsLoad)) 252 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 253 } 254 } 255 256 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, 257 int SPAdj, unsigned FIOperandNum, 258 RegScavenger *RS) const { 259 MachineFunction *MF = MI->getParent()->getParent(); 260 MachineBasicBlock *MBB = MI->getParent(); 261 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 262 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 263 const SIInstrInfo *TII = 264 static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); 265 DebugLoc DL = MI->getDebugLoc(); 266 267 MachineOperand &FIOp = MI->getOperand(FIOperandNum); 268 int Index = MI->getOperand(FIOperandNum).getIndex(); 269 270 switch (MI->getOpcode()) { 271 // SGPR register spill 272 case AMDGPU::SI_SPILL_S512_SAVE: 273 case AMDGPU::SI_SPILL_S256_SAVE: 274 case AMDGPU::SI_SPILL_S128_SAVE: 275 case AMDGPU::SI_SPILL_S64_SAVE: 276 case AMDGPU::SI_SPILL_S32_SAVE: { 277 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 278 279 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 280 unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), 281 &AMDGPU::SGPR_32RegClass, i); 282 struct SIMachineFunctionInfo::SpilledReg Spill = 283 MFI->getSpilledReg(MF, Index, i); 284 285 if (Spill.VGPR == AMDGPU::NoRegister) { 286 LLVMContext &Ctx = MF->getFunction()->getContext(); 287 Ctx.emitError("Ran out of VGPRs for spilling SGPR"); 288 } 289 290 BuildMI(*MBB, MI, DL, 291 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), 292 Spill.VGPR) 293 .addReg(SubReg) 294 .addImm(Spill.Lane); 295 296 // FIXME: Since this spills to another register instead of an actual 297 // frame index, we should delete the frame index when all references to 298 // it are fixed. 299 } 300 MI->eraseFromParent(); 301 break; 302 } 303 304 // SGPR register restore 305 case AMDGPU::SI_SPILL_S512_RESTORE: 306 case AMDGPU::SI_SPILL_S256_RESTORE: 307 case AMDGPU::SI_SPILL_S128_RESTORE: 308 case AMDGPU::SI_SPILL_S64_RESTORE: 309 case AMDGPU::SI_SPILL_S32_RESTORE: { 310 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 311 312 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 313 unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), 314 &AMDGPU::SGPR_32RegClass, i); 315 struct SIMachineFunctionInfo::SpilledReg Spill = 316 MFI->getSpilledReg(MF, Index, i); 317 318 if (Spill.VGPR == AMDGPU::NoRegister) { 319 LLVMContext &Ctx = MF->getFunction()->getContext(); 320 Ctx.emitError("Ran out of VGPRs for spilling SGPR"); 321 } 322 323 BuildMI(*MBB, MI, DL, 324 TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), 325 SubReg) 326 .addReg(Spill.VGPR) 327 .addImm(Spill.Lane) 328 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); 329 } 330 331 // TODO: only do this when it is needed 332 switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) { 333 case AMDGPUSubtarget::SOUTHERN_ISLANDS: 334 // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states 335 // ("S_NOP 3") on SI 336 TII->insertWaitStates(MI, 4); 337 break; 338 case AMDGPUSubtarget::SEA_ISLANDS: 339 break; 340 default: // VOLCANIC_ISLANDS and later 341 // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states 342 // ("S_NOP 4") on VI and later. This also applies to VALUs which write 343 // VCC, but we're unlikely to see VMEM use VCC. 344 TII->insertWaitStates(MI, 5); 345 } 346 347 MI->eraseFromParent(); 348 break; 349 } 350 351 // VGPR register spill 352 case AMDGPU::SI_SPILL_V512_SAVE: 353 case AMDGPU::SI_SPILL_V256_SAVE: 354 case AMDGPU::SI_SPILL_V128_SAVE: 355 case AMDGPU::SI_SPILL_V96_SAVE: 356 case AMDGPU::SI_SPILL_V64_SAVE: 357 case AMDGPU::SI_SPILL_V32_SAVE: 358 buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, 359 TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), 360 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), 361 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), 362 FrameInfo->getObjectOffset(Index), RS); 363 MI->eraseFromParent(); 364 break; 365 case AMDGPU::SI_SPILL_V32_RESTORE: 366 case AMDGPU::SI_SPILL_V64_RESTORE: 367 case AMDGPU::SI_SPILL_V96_RESTORE: 368 case AMDGPU::SI_SPILL_V128_RESTORE: 369 case AMDGPU::SI_SPILL_V256_RESTORE: 370 case AMDGPU::SI_SPILL_V512_RESTORE: { 371 buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, 372 TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), 373 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), 374 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), 375 FrameInfo->getObjectOffset(Index), RS); 376 MI->eraseFromParent(); 377 break; 378 } 379 380 default: { 381 int64_t Offset = FrameInfo->getObjectOffset(Index); 382 FIOp.ChangeToImmediate(Offset); 383 if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { 384 unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj); 385 BuildMI(*MBB, MI, MI->getDebugLoc(), 386 TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 387 .addImm(Offset); 388 FIOp.ChangeToRegister(TmpReg, false, false, true); 389 } 390 } 391 } 392 } 393 394 unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const { 395 return getEncodingValue(Reg) & 0xff; 396 } 397 398 // FIXME: This is very slow. It might be worth creating a map from physreg to 399 // register class. 400 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { 401 assert(!TargetRegisterInfo::isVirtualRegister(Reg)); 402 403 static const TargetRegisterClass *const BaseClasses[] = { 404 &AMDGPU::VGPR_32RegClass, 405 &AMDGPU::SReg_32RegClass, 406 &AMDGPU::VReg_64RegClass, 407 &AMDGPU::SReg_64RegClass, 408 &AMDGPU::VReg_96RegClass, 409 &AMDGPU::VReg_128RegClass, 410 &AMDGPU::SReg_128RegClass, 411 &AMDGPU::VReg_256RegClass, 412 &AMDGPU::SReg_256RegClass, 413 &AMDGPU::VReg_512RegClass, 414 &AMDGPU::SReg_512RegClass 415 }; 416 417 for (const TargetRegisterClass *BaseClass : BaseClasses) { 418 if (BaseClass->contains(Reg)) { 419 return BaseClass; 420 } 421 } 422 return nullptr; 423 } 424 425 // TODO: It might be helpful to have some target specific flags in 426 // TargetRegisterClass to mark which classes are VGPRs to make this trivial. 427 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { 428 switch (RC->getSize()) { 429 case 4: 430 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; 431 case 8: 432 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; 433 case 12: 434 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; 435 case 16: 436 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; 437 case 32: 438 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; 439 case 64: 440 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; 441 default: 442 llvm_unreachable("Invalid register class size"); 443 } 444 } 445 446 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( 447 const TargetRegisterClass *SRC) const { 448 switch (SRC->getSize()) { 449 case 4: 450 return &AMDGPU::VGPR_32RegClass; 451 case 8: 452 return &AMDGPU::VReg_64RegClass; 453 case 12: 454 return &AMDGPU::VReg_96RegClass; 455 case 16: 456 return &AMDGPU::VReg_128RegClass; 457 case 32: 458 return &AMDGPU::VReg_256RegClass; 459 case 64: 460 return &AMDGPU::VReg_512RegClass; 461 default: 462 llvm_unreachable("Invalid register class size"); 463 } 464 } 465 466 const TargetRegisterClass *SIRegisterInfo::getSubRegClass( 467 const TargetRegisterClass *RC, unsigned SubIdx) const { 468 if (SubIdx == AMDGPU::NoSubRegister) 469 return RC; 470 471 // If this register has a sub-register, we can safely assume it is a 32-bit 472 // register, because all of SI's sub-registers are 32-bit. 473 if (isSGPRClass(RC)) { 474 return &AMDGPU::SGPR_32RegClass; 475 } else { 476 return &AMDGPU::VGPR_32RegClass; 477 } 478 } 479 480 bool SIRegisterInfo::shouldRewriteCopySrc( 481 const TargetRegisterClass *DefRC, 482 unsigned DefSubReg, 483 const TargetRegisterClass *SrcRC, 484 unsigned SrcSubReg) const { 485 // We want to prefer the smallest register class possible, so we don't want to 486 // stop and rewrite on anything that looks like a subregister 487 // extract. Operations mostly don't care about the super register class, so we 488 // only want to stop on the most basic of copies between the smae register 489 // class. 490 // 491 // e.g. if we have something like 492 // vreg0 = ... 493 // vreg1 = ... 494 // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2 495 // vreg3 = COPY vreg2, sub0 496 // 497 // We want to look through the COPY to find: 498 // => vreg3 = COPY vreg0 499 500 // Plain copy. 501 return getCommonSubClass(DefRC, SrcRC) != nullptr; 502 } 503 504 unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, 505 const TargetRegisterClass *SubRC, 506 unsigned Channel) const { 507 508 switch (Reg) { 509 case AMDGPU::VCC: 510 switch(Channel) { 511 case 0: return AMDGPU::VCC_LO; 512 case 1: return AMDGPU::VCC_HI; 513 default: llvm_unreachable("Invalid SubIdx for VCC"); 514 } 515 516 case AMDGPU::FLAT_SCR: 517 switch (Channel) { 518 case 0: 519 return AMDGPU::FLAT_SCR_LO; 520 case 1: 521 return AMDGPU::FLAT_SCR_HI; 522 default: 523 llvm_unreachable("Invalid SubIdx for FLAT_SCR"); 524 } 525 break; 526 527 case AMDGPU::EXEC: 528 switch (Channel) { 529 case 0: 530 return AMDGPU::EXEC_LO; 531 case 1: 532 return AMDGPU::EXEC_HI; 533 default: 534 llvm_unreachable("Invalid SubIdx for EXEC"); 535 } 536 break; 537 } 538 539 const TargetRegisterClass *RC = getPhysRegClass(Reg); 540 // 32-bit registers don't have sub-registers, so we can just return the 541 // Reg. We need to have this check here, because the calculation below 542 // using getHWRegIndex() will fail with special 32-bit registers like 543 // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0. 544 if (RC->getSize() == 4) { 545 assert(Channel == 0); 546 return Reg; 547 } 548 549 unsigned Index = getHWRegIndex(Reg); 550 return SubRC->getRegister(Index + Channel); 551 } 552 553 bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { 554 return OpType == AMDGPU::OPERAND_REG_IMM32; 555 } 556 557 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { 558 if (opCanUseLiteralConstant(OpType)) 559 return true; 560 561 return OpType == AMDGPU::OPERAND_REG_INLINE_C; 562 } 563 564 // FIXME: Most of these are flexible with HSA and we don't need to reserve them 565 // as input registers if unused. Whether the dispatch ptr is necessary should be 566 // easy to detect from used intrinsics. Scratch setup is harder to know. 567 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, 568 enum PreloadedValue Value) const { 569 570 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 571 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 572 (void)ST; 573 switch (Value) { 574 case SIRegisterInfo::WORKGROUP_ID_X: 575 assert(MFI->hasWorkGroupIDX()); 576 return MFI->WorkGroupIDXSystemSGPR; 577 case SIRegisterInfo::WORKGROUP_ID_Y: 578 assert(MFI->hasWorkGroupIDY()); 579 return MFI->WorkGroupIDYSystemSGPR; 580 case SIRegisterInfo::WORKGROUP_ID_Z: 581 assert(MFI->hasWorkGroupIDZ()); 582 return MFI->WorkGroupIDZSystemSGPR; 583 case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: 584 return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; 585 case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: 586 assert(ST.isAmdHsaOS() && "Non-HSA ABI currently uses relocations"); 587 assert(MFI->hasPrivateSegmentBuffer()); 588 return MFI->PrivateSegmentBufferUserSGPR; 589 case SIRegisterInfo::KERNARG_SEGMENT_PTR: 590 assert(MFI->hasKernargSegmentPtr()); 591 return MFI->KernargSegmentPtrUserSGPR; 592 case SIRegisterInfo::DISPATCH_PTR: 593 assert(MFI->hasDispatchPtr()); 594 return MFI->DispatchPtrUserSGPR; 595 case SIRegisterInfo::QUEUE_PTR: 596 llvm_unreachable("not implemented"); 597 case SIRegisterInfo::WORKITEM_ID_X: 598 assert(MFI->hasWorkItemIDX()); 599 return AMDGPU::VGPR0; 600 case SIRegisterInfo::WORKITEM_ID_Y: 601 assert(MFI->hasWorkItemIDY()); 602 return AMDGPU::VGPR1; 603 case SIRegisterInfo::WORKITEM_ID_Z: 604 assert(MFI->hasWorkItemIDZ()); 605 return AMDGPU::VGPR2; 606 } 607 llvm_unreachable("unexpected preloaded value type"); 608 } 609 610 /// \brief Returns a register that is not used at any point in the function. 611 /// If all registers are used, then this function will return 612 // AMDGPU::NoRegister. 613 unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, 614 const TargetRegisterClass *RC) const { 615 for (unsigned Reg : *RC) 616 if (!MRI.isPhysRegUsed(Reg)) 617 return Reg; 618 return AMDGPU::NoRegister; 619 } 620 621 unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { 622 switch(WaveCount) { 623 case 10: return 24; 624 case 9: return 28; 625 case 8: return 32; 626 case 7: return 36; 627 case 6: return 40; 628 case 5: return 48; 629 case 4: return 64; 630 case 3: return 84; 631 case 2: return 128; 632 default: return 256; 633 } 634 } 635 636 unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, 637 unsigned WaveCount) const { 638 if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 639 switch (WaveCount) { 640 case 10: return 80; 641 case 9: return 80; 642 case 8: return 96; 643 default: return 102; 644 } 645 } else { 646 switch(WaveCount) { 647 case 10: return 48; 648 case 9: return 56; 649 case 8: return 64; 650 case 7: return 72; 651 case 6: return 80; 652 case 5: return 96; 653 default: return 103; 654 } 655 } 656 } 657