Home | History | Annotate | Download | only in AMDGPU
      1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// SI implementation of the TargetRegisterInfo class.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "SIRegisterInfo.h"
     16 #include "AMDGPURegisterBankInfo.h"
     17 #include "AMDGPUSubtarget.h"
     18 #include "SIInstrInfo.h"
     19 #include "SIMachineFunctionInfo.h"
     20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     21 #include "llvm/CodeGen/MachineFrameInfo.h"
     22 #include "llvm/CodeGen/MachineInstrBuilder.h"
     23 #include "llvm/CodeGen/RegisterScavenging.h"
     24 #include "llvm/IR/Function.h"
     25 #include "llvm/IR/LLVMContext.h"
     26 
     27 using namespace llvm;
     28 
     29 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
     30   for (unsigned i = 0; PSets[i] != -1; ++i) {
     31     if (PSets[i] == (int)PSetID)
     32       return true;
     33   }
     34   return false;
     35 }
     36 
     37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
     38                                          BitVector &PressureSets) const {
     39   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
     40     const int *PSets = getRegUnitPressureSets(*U);
     41     if (hasPressureSet(PSets, PSetID)) {
     42       PressureSets.set(PSetID);
     43       break;
     44     }
     45   }
     46 }
     47 
     48 static cl::opt<bool> EnableSpillSGPRToSMEM(
     49   "amdgpu-spill-sgpr-to-smem",
     50   cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
     51   cl::init(false));
     52 
     53 static cl::opt<bool> EnableSpillSGPRToVGPR(
     54   "amdgpu-spill-sgpr-to-vgpr",
     55   cl::desc("Enable spilling VGPRs to SGPRs"),
     56   cl::ReallyHidden,
     57   cl::init(true));
     58 
     59 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
     60   AMDGPURegisterInfo(),
     61   SGPRPressureSets(getNumRegPressureSets()),
     62   VGPRPressureSets(getNumRegPressureSets()),
     63   SpillSGPRToVGPR(false),
     64   SpillSGPRToSMEM(false) {
     65   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
     66     SpillSGPRToSMEM = true;
     67   else if (EnableSpillSGPRToVGPR)
     68     SpillSGPRToVGPR = true;
     69 
     70   unsigned NumRegPressureSets = getNumRegPressureSets();
     71 
     72   SGPRSetID = NumRegPressureSets;
     73   VGPRSetID = NumRegPressureSets;
     74 
     75   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
     76     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
     77     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
     78   }
     79 
     80   // Determine the number of reg units for each pressure set.
     81   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
     82   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
     83     const int *PSets = getRegUnitPressureSets(i);
     84     for (unsigned j = 0; PSets[j] != -1; ++j) {
     85       ++PressureSetRegUnits[PSets[j]];
     86     }
     87   }
     88 
     89   unsigned VGPRMax = 0, SGPRMax = 0;
     90   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
     91     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
     92       VGPRSetID = i;
     93       VGPRMax = PressureSetRegUnits[i];
     94       continue;
     95     }
     96     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
     97       SGPRSetID = i;
     98       SGPRMax = PressureSetRegUnits[i];
     99     }
    100   }
    101 
    102   assert(SGPRSetID < NumRegPressureSets &&
    103          VGPRSetID < NumRegPressureSets);
    104 }
    105 
    106 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
    107   const MachineFunction &MF) const {
    108 
    109   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    110   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
    111   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
    112   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
    113 }
    114 
    115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
    116   unsigned Reg;
    117 
    118   // Try to place it in a hole after PrivateSegmentBufferReg.
    119   if (RegCount & 3) {
    120     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
    121     // alignment constraints, so we have a hole where can put the wave offset.
    122     Reg = RegCount - 1;
    123   } else {
    124     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
    125     // wave offset before it.
    126     Reg = RegCount - 5;
    127   }
    128 
    129   return Reg;
    130 }
    131 
    132 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
    133   const MachineFunction &MF) const {
    134   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    135   unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
    136   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
    137 }
    138 
    139 unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
    140   const MachineFunction &MF) const {
    141   return AMDGPU::SGPR32;
    142 }
    143 
    144 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
    145   BitVector Reserved(getNumRegs());
    146 
    147   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
    148   // this seems likely to result in bugs, so I'm marking them as reserved.
    149   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
    150   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
    151 
    152   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
    153   reserveRegisterTuples(Reserved, AMDGPU::M0);
    154 
    155   // Reserve the memory aperture registers.
    156   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
    157   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
    158   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
    159   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
    160 
    161   // Reserve xnack_mask registers - support is not implemented in Codegen.
    162   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
    163 
    164   // Reserve Trap Handler registers - support is not implemented in Codegen.
    165   reserveRegisterTuples(Reserved, AMDGPU::TBA);
    166   reserveRegisterTuples(Reserved, AMDGPU::TMA);
    167   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
    168   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
    169   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
    170   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
    171   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
    172   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
    173   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
    174   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
    175 
    176   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    177 
    178   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
    179   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
    180   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
    181     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
    182     reserveRegisterTuples(Reserved, Reg);
    183   }
    184 
    185   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
    186   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
    187   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
    188     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
    189     reserveRegisterTuples(Reserved, Reg);
    190   }
    191 
    192   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    193 
    194   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
    195   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
    196     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
    197     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
    198   }
    199 
    200   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
    201   if (ScratchRSrcReg != AMDGPU::NoRegister) {
    202     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
    203     // to spill.
    204     // TODO: May need to reserve a VGPR if doing LDS spilling.
    205     reserveRegisterTuples(Reserved, ScratchRSrcReg);
    206     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
    207   }
    208 
    209   // We have to assume the SP is needed in case there are calls in the function,
    210   // which is detected after the function is lowered. If we aren't really going
    211   // to need SP, don't bother reserving it.
    212   unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
    213 
    214   if (StackPtrReg != AMDGPU::NoRegister) {
    215     reserveRegisterTuples(Reserved, StackPtrReg);
    216     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
    217   }
    218 
    219   unsigned FrameReg = MFI->getFrameOffsetReg();
    220   if (FrameReg != AMDGPU::NoRegister) {
    221     reserveRegisterTuples(Reserved, FrameReg);
    222     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
    223   }
    224 
    225   return Reserved;
    226 }
    227 
    228 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
    229   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
    230   if (Info->isEntryFunction()) {
    231     const MachineFrameInfo &MFI = Fn.getFrameInfo();
    232     return MFI.hasStackObjects() || MFI.hasCalls();
    233   }
    234 
    235   // May need scavenger for dealing with callee saved registers.
    236   return true;
    237 }
    238 
    239 bool SIRegisterInfo::requiresFrameIndexScavenging(
    240   const MachineFunction &MF) const {
    241   const MachineFrameInfo &MFI = MF.getFrameInfo();
    242   if (MFI.hasStackObjects())
    243     return true;
    244 
    245   // May need to deal with callee saved registers.
    246   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    247   return !Info->isEntryFunction();
    248 }
    249 
    250 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
    251   const MachineFunction &MF) const {
    252   // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
    253   // create a virtual register for it during frame index elimination, so the
    254   // scavenger is directly needed.
    255   return MF.getFrameInfo().hasStackObjects() &&
    256          MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
    257          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
    258 }
    259 
    260 bool SIRegisterInfo::requiresVirtualBaseRegisters(
    261   const MachineFunction &) const {
    262   // There are no special dedicated stack or frame pointers.
    263   return true;
    264 }
    265 
    266 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
    267   // This helps catch bugs as verifier errors.
    268   return true;
    269 }
    270 
    271 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
    272   assert(SIInstrInfo::isMUBUF(*MI));
    273 
    274   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    275                                           AMDGPU::OpName::offset);
    276   return MI->getOperand(OffIdx).getImm();
    277 }
    278 
    279 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
    280                                                  int Idx) const {
    281   if (!SIInstrInfo::isMUBUF(*MI))
    282     return 0;
    283 
    284   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    285                                            AMDGPU::OpName::vaddr) &&
    286          "Should never see frame index on non-address operand");
    287 
    288   return getMUBUFInstrOffset(MI);
    289 }
    290 
    291 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
    292   if (!MI->mayLoadOrStore())
    293     return false;
    294 
    295   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
    296 
    297   return !isUInt<12>(FullOffset);
    298 }
    299 
    300 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
    301                                                   unsigned BaseReg,
    302                                                   int FrameIdx,
    303                                                   int64_t Offset) const {
    304   MachineBasicBlock::iterator Ins = MBB->begin();
    305   DebugLoc DL; // Defaults to "unknown"
    306 
    307   if (Ins != MBB->end())
    308     DL = Ins->getDebugLoc();
    309 
    310   MachineFunction *MF = MBB->getParent();
    311   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
    312   const SIInstrInfo *TII = Subtarget.getInstrInfo();
    313 
    314   if (Offset == 0) {
    315     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
    316       .addFrameIndex(FrameIdx);
    317     return;
    318   }
    319 
    320   MachineRegisterInfo &MRI = MF->getRegInfo();
    321   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    322 
    323   unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    324 
    325   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
    326     .addImm(Offset);
    327   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
    328     .addFrameIndex(FrameIdx);
    329 
    330   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
    331     .addReg(OffsetReg, RegState::Kill)
    332     .addReg(FIReg);
    333 }
    334 
    335 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
    336                                        int64_t Offset) const {
    337 
    338   MachineBasicBlock *MBB = MI.getParent();
    339   MachineFunction *MF = MBB->getParent();
    340   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
    341   const SIInstrInfo *TII = Subtarget.getInstrInfo();
    342 
    343 #ifndef NDEBUG
    344   // FIXME: Is it possible to be storing a frame index to itself?
    345   bool SeenFI = false;
    346   for (const MachineOperand &MO: MI.operands()) {
    347     if (MO.isFI()) {
    348       if (SeenFI)
    349         llvm_unreachable("should not see multiple frame indices");
    350 
    351       SeenFI = true;
    352     }
    353   }
    354 #endif
    355 
    356   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
    357   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
    358   assert(TII->isMUBUF(MI));
    359   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
    360          MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
    361          "should only be seeing frame offset relative FrameIndex");
    362 
    363 
    364   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
    365   int64_t NewOffset = OffsetOp->getImm() + Offset;
    366   assert(isUInt<12>(NewOffset) && "offset should be legal");
    367 
    368   FIOp->ChangeToRegister(BaseReg, false);
    369   OffsetOp->setImm(NewOffset);
    370 }
    371 
    372 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
    373                                         unsigned BaseReg,
    374                                         int64_t Offset) const {
    375   if (!SIInstrInfo::isMUBUF(*MI))
    376     return false;
    377 
    378   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
    379 
    380   return isUInt<12>(NewOffset);
    381 }
    382 
    383 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
    384   const MachineFunction &MF, unsigned Kind) const {
    385   // This is inaccurate. It depends on the instruction and address space. The
    386   // only place where we should hit this is for dealing with frame indexes /
    387   // private accesses, so this is correct in that case.
    388   return &AMDGPU::VGPR_32RegClass;
    389 }
    390 
    391 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
    392 
    393   switch (Op) {
    394   case AMDGPU::SI_SPILL_S512_SAVE:
    395   case AMDGPU::SI_SPILL_S512_RESTORE:
    396   case AMDGPU::SI_SPILL_V512_SAVE:
    397   case AMDGPU::SI_SPILL_V512_RESTORE:
    398     return 16;
    399   case AMDGPU::SI_SPILL_S256_SAVE:
    400   case AMDGPU::SI_SPILL_S256_RESTORE:
    401   case AMDGPU::SI_SPILL_V256_SAVE:
    402   case AMDGPU::SI_SPILL_V256_RESTORE:
    403     return 8;
    404   case AMDGPU::SI_SPILL_S128_SAVE:
    405   case AMDGPU::SI_SPILL_S128_RESTORE:
    406   case AMDGPU::SI_SPILL_V128_SAVE:
    407   case AMDGPU::SI_SPILL_V128_RESTORE:
    408     return 4;
    409   case AMDGPU::SI_SPILL_V96_SAVE:
    410   case AMDGPU::SI_SPILL_V96_RESTORE:
    411     return 3;
    412   case AMDGPU::SI_SPILL_S64_SAVE:
    413   case AMDGPU::SI_SPILL_S64_RESTORE:
    414   case AMDGPU::SI_SPILL_V64_SAVE:
    415   case AMDGPU::SI_SPILL_V64_RESTORE:
    416     return 2;
    417   case AMDGPU::SI_SPILL_S32_SAVE:
    418   case AMDGPU::SI_SPILL_S32_RESTORE:
    419   case AMDGPU::SI_SPILL_V32_SAVE:
    420   case AMDGPU::SI_SPILL_V32_RESTORE:
    421     return 1;
    422   default: llvm_unreachable("Invalid spill opcode");
    423   }
    424 }
    425 
    426 static int getOffsetMUBUFStore(unsigned Opc) {
    427   switch (Opc) {
    428   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
    429     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
    430   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
    431     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
    432   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
    433     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
    434   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
    435     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
    436   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
    437     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
    438   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
    439     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
    440   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
    441     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
    442   default:
    443     return -1;
    444   }
    445 }
    446 
    447 static int getOffsetMUBUFLoad(unsigned Opc) {
    448   switch (Opc) {
    449   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
    450     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
    451   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
    452     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
    453   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
    454     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
    455   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
    456     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
    457   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
    458     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
    459   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
    460     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
    461   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
    462     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
    463   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
    464     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
    465   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
    466     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
    467   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
    468     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
    469   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
    470     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
    471   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
    472     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
    473   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
    474     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
    475   default:
    476     return -1;
    477   }
    478 }
    479 
    480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
    481 // need to handle the case where an SGPR may need to be spilled while spilling.
    482 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
    483                                       MachineFrameInfo &MFI,
    484                                       MachineBasicBlock::iterator MI,
    485                                       int Index,
    486                                       int64_t Offset) {
    487   MachineBasicBlock *MBB = MI->getParent();
    488   const DebugLoc &DL = MI->getDebugLoc();
    489   bool IsStore = MI->mayStore();
    490 
    491   unsigned Opc = MI->getOpcode();
    492   int LoadStoreOp = IsStore ?
    493     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
    494   if (LoadStoreOp == -1)
    495     return false;
    496 
    497   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
    498   MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
    499     .add(*Reg)
    500     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
    501     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
    502     .addImm(Offset)
    503     .addImm(0) // glc
    504     .addImm(0) // slc
    505     .addImm(0) // tfe
    506     .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
    507 
    508   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
    509                                                        AMDGPU::OpName::vdata_in);
    510   if (VDataIn)
    511     NewMI.add(*VDataIn);
    512   return true;
    513 }
    514 
    515 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
    516                                          unsigned LoadStoreOp,
    517                                          int Index,
    518                                          unsigned ValueReg,
    519                                          bool IsKill,
    520                                          unsigned ScratchRsrcReg,
    521                                          unsigned ScratchOffsetReg,
    522                                          int64_t InstOffset,
    523                                          MachineMemOperand *MMO,
    524                                          RegScavenger *RS) const {
    525   MachineBasicBlock *MBB = MI->getParent();
    526   MachineFunction *MF = MI->getParent()->getParent();
    527   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
    528   const SIInstrInfo *TII = ST.getInstrInfo();
    529   const MachineFrameInfo &MFI = MF->getFrameInfo();
    530 
    531   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
    532   const DebugLoc &DL = MI->getDebugLoc();
    533   bool IsStore = Desc.mayStore();
    534 
    535   bool Scavenged = false;
    536   unsigned SOffset = ScratchOffsetReg;
    537 
    538   const unsigned EltSize = 4;
    539   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
    540   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
    541   unsigned Size = NumSubRegs * EltSize;
    542   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
    543   int64_t ScratchOffsetRegDelta = 0;
    544 
    545   unsigned Align = MFI.getObjectAlignment(Index);
    546   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
    547 
    548   assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
    549 
    550   if (!isUInt<12>(Offset + Size - EltSize)) {
    551     SOffset = AMDGPU::NoRegister;
    552 
    553     // We currently only support spilling VGPRs to EltSize boundaries, meaning
    554     // we can simplify the adjustment of Offset here to just scale with
    555     // WavefrontSize.
    556     Offset *= ST.getWavefrontSize();
    557 
    558     // We don't have access to the register scavenger if this function is called
    559     // during  PEI::scavengeFrameVirtualRegs().
    560     if (RS)
    561       SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
    562 
    563     if (SOffset == AMDGPU::NoRegister) {
    564       // There are no free SGPRs, and since we are in the process of spilling
    565       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
    566       // on SI/CI and on VI it is true until we implement spilling using scalar
    567       // stores), we have no way to free up an SGPR.  Our solution here is to
    568       // add the offset directly to the ScratchOffset register, and then
    569       // subtract the offset after the spill to return ScratchOffset to it's
    570       // original value.
    571       SOffset = ScratchOffsetReg;
    572       ScratchOffsetRegDelta = Offset;
    573     } else {
    574       Scavenged = true;
    575     }
    576 
    577     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
    578       .addReg(ScratchOffsetReg)
    579       .addImm(Offset);
    580 
    581     Offset = 0;
    582   }
    583 
    584   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
    585     unsigned SubReg = NumSubRegs == 1 ?
    586       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
    587 
    588     unsigned SOffsetRegState = 0;
    589     unsigned SrcDstRegState = getDefRegState(!IsStore);
    590     if (i + 1 == e) {
    591       SOffsetRegState |= getKillRegState(Scavenged);
    592       // The last implicit use carries the "Kill" flag.
    593       SrcDstRegState |= getKillRegState(IsKill);
    594     }
    595 
    596     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
    597     MachineMemOperand *NewMMO
    598       = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
    599                                  EltSize, MinAlign(Align, EltSize * i));
    600 
    601     auto MIB = BuildMI(*MBB, MI, DL, Desc)
    602       .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
    603       .addReg(ScratchRsrcReg)
    604       .addReg(SOffset, SOffsetRegState)
    605       .addImm(Offset)
    606       .addImm(0) // glc
    607       .addImm(0) // slc
    608       .addImm(0) // tfe
    609       .addMemOperand(NewMMO);
    610 
    611     if (NumSubRegs > 1)
    612       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
    613   }
    614 
    615   if (ScratchOffsetRegDelta != 0) {
    616     // Subtract the offset we added to the ScratchOffset register.
    617     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
    618         .addReg(ScratchOffsetReg)
    619         .addImm(ScratchOffsetRegDelta);
    620   }
    621 }
    622 
    623 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
    624                                                      bool Store) {
    625   if (SuperRegSize % 16 == 0) {
    626     return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
    627                          AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
    628   }
    629 
    630   if (SuperRegSize % 8 == 0) {
    631     return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
    632                         AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
    633   }
    634 
    635   return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
    636                       AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
    637 }
    638 
    639 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
    640                                int Index,
    641                                RegScavenger *RS,
    642                                bool OnlyToVGPR) const {
    643   MachineBasicBlock *MBB = MI->getParent();
    644   MachineFunction *MF = MBB->getParent();
    645   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
    646   DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
    647 
    648   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
    649     = MFI->getSGPRToVGPRSpills(Index);
    650   bool SpillToVGPR = !VGPRSpills.empty();
    651   if (OnlyToVGPR && !SpillToVGPR)
    652     return false;
    653 
    654   MachineRegisterInfo &MRI = MF->getRegInfo();
    655   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
    656   const SIInstrInfo *TII = ST.getInstrInfo();
    657 
    658   unsigned SuperReg = MI->getOperand(0).getReg();
    659   bool IsKill = MI->getOperand(0).isKill();
    660   const DebugLoc &DL = MI->getDebugLoc();
    661 
    662   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
    663 
    664   bool SpillToSMEM = spillSGPRToSMEM();
    665   if (SpillToSMEM && OnlyToVGPR)
    666     return false;
    667 
    668   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
    669                          SuperReg != MFI->getFrameOffsetReg() &&
    670                          SuperReg != MFI->getScratchWaveOffsetReg()));
    671 
    672   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
    673 
    674   unsigned OffsetReg = AMDGPU::M0;
    675   unsigned M0CopyReg = AMDGPU::NoRegister;
    676 
    677   if (SpillToSMEM) {
    678     if (RS->isRegUsed(AMDGPU::M0)) {
    679       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    680       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
    681         .addReg(AMDGPU::M0);
    682     }
    683   }
    684 
    685   unsigned ScalarStoreOp;
    686   unsigned EltSize = 4;
    687   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
    688   if (SpillToSMEM && isSGPRClass(RC)) {
    689     // XXX - if private_element_size is larger than 4 it might be useful to be
    690     // able to spill wider vmem spills.
    691     std::tie(EltSize, ScalarStoreOp) =
    692           getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
    693   }
    694 
    695   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
    696   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
    697 
    698   // SubReg carries the "Kill" flag when SubReg == SuperReg.
    699   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
    700   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    701     unsigned SubReg = NumSubRegs == 1 ?
    702       SuperReg : getSubReg(SuperReg, SplitParts[i]);
    703 
    704     if (SpillToSMEM) {
    705       int64_t FrOffset = FrameInfo.getObjectOffset(Index);
    706 
    707       // The allocated memory size is really the wavefront size * the frame
    708       // index size. The widest register class is 64 bytes, so a 4-byte scratch
    709       // allocation is enough to spill this in a single stack object.
    710       //
    711       // FIXME: Frame size/offsets are computed earlier than this, so the extra
    712       // space is still unnecessarily allocated.
    713 
    714       unsigned Align = FrameInfo.getObjectAlignment(Index);
    715       MachinePointerInfo PtrInfo
    716         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
    717       MachineMemOperand *MMO
    718         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
    719                                    EltSize, MinAlign(Align, EltSize * i));
    720 
    721       // SMEM instructions only support a single offset, so increment the wave
    722       // offset.
    723 
    724       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
    725       if (Offset != 0) {
    726         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
    727           .addReg(MFI->getFrameOffsetReg())
    728           .addImm(Offset);
    729       } else {
    730         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
    731           .addReg(MFI->getFrameOffsetReg());
    732       }
    733 
    734       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
    735         .addReg(SubReg, getKillRegState(IsKill)) // sdata
    736         .addReg(MFI->getScratchRSrcReg())        // sbase
    737         .addReg(OffsetReg, RegState::Kill)       // soff
    738         .addImm(0)                               // glc
    739         .addMemOperand(MMO);
    740 
    741       continue;
    742     }
    743 
    744     if (SpillToVGPR) {
    745       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
    746 
    747       // During SGPR spilling to VGPR, determine if the VGPR is defined. The
    748       // only circumstance in which we say it is undefined is when it is the
    749       // first spill to this VGPR in the first basic block.
    750       bool VGPRDefined = true;
    751       if (MBB == &MF->front())
    752         VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
    753 
    754       // Mark the "old value of vgpr" input undef only if this is the first sgpr
    755       // spill to this specific vgpr in the first basic block.
    756       BuildMI(*MBB, MI, DL,
    757               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
    758               Spill.VGPR)
    759         .addReg(SubReg, getKillRegState(IsKill))
    760         .addImm(Spill.Lane)
    761         .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
    762 
    763       // FIXME: Since this spills to another register instead of an actual
    764       // frame index, we should delete the frame index when all references to
    765       // it are fixed.
    766     } else {
    767       // XXX - Can to VGPR spill fail for some subregisters but not others?
    768       if (OnlyToVGPR)
    769         return false;
    770 
    771       // Spill SGPR to a frame index.
    772       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
    773       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    774       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
    775 
    776       MachineInstrBuilder Mov
    777         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
    778         .addReg(SubReg, SubKillState);
    779 
    780 
    781       // There could be undef components of a spilled super register.
    782       // TODO: Can we detect this and skip the spill?
    783       if (NumSubRegs > 1) {
    784         // The last implicit use of the SuperReg carries the "Kill" flag.
    785         unsigned SuperKillState = 0;
    786         if (i + 1 == e)
    787           SuperKillState |= getKillRegState(IsKill);
    788         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
    789       }
    790 
    791       unsigned Align = FrameInfo.getObjectAlignment(Index);
    792       MachinePointerInfo PtrInfo
    793         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
    794       MachineMemOperand *MMO
    795         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
    796                                    EltSize, MinAlign(Align, EltSize * i));
    797       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
    798         .addReg(TmpReg, RegState::Kill)    // src
    799         .addFrameIndex(Index)              // vaddr
    800         .addReg(MFI->getScratchRSrcReg())  // srrsrc
    801         .addReg(MFI->getFrameOffsetReg())  // soffset
    802         .addImm(i * 4)                     // offset
    803         .addMemOperand(MMO);
    804     }
    805   }
    806 
    807   if (M0CopyReg != AMDGPU::NoRegister) {
    808     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
    809       .addReg(M0CopyReg, RegState::Kill);
    810   }
    811 
    812   MI->eraseFromParent();
    813   MFI->addToSpilledSGPRs(NumSubRegs);
    814   return true;
    815 }
    816 
    817 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
    818                                  int Index,
    819                                  RegScavenger *RS,
    820                                  bool OnlyToVGPR) const {
    821   MachineFunction *MF = MI->getParent()->getParent();
    822   MachineRegisterInfo &MRI = MF->getRegInfo();
    823   MachineBasicBlock *MBB = MI->getParent();
    824   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
    825 
    826   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
    827     = MFI->getSGPRToVGPRSpills(Index);
    828   bool SpillToVGPR = !VGPRSpills.empty();
    829   if (OnlyToVGPR && !SpillToVGPR)
    830     return false;
    831 
    832   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
    833   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
    834   const SIInstrInfo *TII = ST.getInstrInfo();
    835   const DebugLoc &DL = MI->getDebugLoc();
    836 
    837   unsigned SuperReg = MI->getOperand(0).getReg();
    838   bool SpillToSMEM = spillSGPRToSMEM();
    839   if (SpillToSMEM && OnlyToVGPR)
    840     return false;
    841 
    842   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
    843 
    844   unsigned OffsetReg = AMDGPU::M0;
    845   unsigned M0CopyReg = AMDGPU::NoRegister;
    846 
    847   if (SpillToSMEM) {
    848     if (RS->isRegUsed(AMDGPU::M0)) {
    849       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    850       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
    851         .addReg(AMDGPU::M0);
    852     }
    853   }
    854 
    855   unsigned EltSize = 4;
    856   unsigned ScalarLoadOp;
    857 
    858   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
    859   if (SpillToSMEM && isSGPRClass(RC)) {
    860     // XXX - if private_element_size is larger than 4 it might be useful to be
    861     // able to spill wider vmem spills.
    862     std::tie(EltSize, ScalarLoadOp) =
    863           getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
    864   }
    865 
    866   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
    867   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
    868 
    869   // SubReg carries the "Kill" flag when SubReg == SuperReg.
    870   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
    871 
    872   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    873     unsigned SubReg = NumSubRegs == 1 ?
    874       SuperReg : getSubReg(SuperReg, SplitParts[i]);
    875 
    876     if (SpillToSMEM) {
    877       // FIXME: Size may be > 4 but extra bytes wasted.
    878       unsigned Align = FrameInfo.getObjectAlignment(Index);
    879       MachinePointerInfo PtrInfo
    880         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
    881       MachineMemOperand *MMO
    882         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
    883                                    EltSize, MinAlign(Align, EltSize * i));
    884 
    885       // Add i * 4 offset
    886       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
    887       if (Offset != 0) {
    888         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
    889           .addReg(MFI->getFrameOffsetReg())
    890           .addImm(Offset);
    891       } else {
    892         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
    893           .addReg(MFI->getFrameOffsetReg());
    894       }
    895 
    896       auto MIB =
    897         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
    898         .addReg(MFI->getScratchRSrcReg()) // sbase
    899         .addReg(OffsetReg, RegState::Kill)                // soff
    900         .addImm(0)                        // glc
    901         .addMemOperand(MMO);
    902 
    903       if (NumSubRegs > 1)
    904         MIB.addReg(SuperReg, RegState::ImplicitDefine);
    905 
    906       continue;
    907     }
    908 
    909     if (SpillToVGPR) {
    910       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
    911       auto MIB =
    912         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
    913                 SubReg)
    914         .addReg(Spill.VGPR)
    915         .addImm(Spill.Lane);
    916 
    917       if (NumSubRegs > 1)
    918         MIB.addReg(SuperReg, RegState::ImplicitDefine);
    919     } else {
    920       if (OnlyToVGPR)
    921         return false;
    922 
    923       // Restore SGPR from a stack slot.
    924       // FIXME: We should use S_LOAD_DWORD here for VI.
    925       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    926       unsigned Align = FrameInfo.getObjectAlignment(Index);
    927 
    928       MachinePointerInfo PtrInfo
    929         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
    930 
    931       MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
    932         MachineMemOperand::MOLoad, EltSize,
    933         MinAlign(Align, EltSize * i));
    934 
    935       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
    936         .addFrameIndex(Index)              // vaddr
    937         .addReg(MFI->getScratchRSrcReg())  // srsrc
    938         .addReg(MFI->getFrameOffsetReg())  // soffset
    939         .addImm(i * 4)                     // offset
    940         .addMemOperand(MMO);
    941 
    942       auto MIB =
    943         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
    944         .addReg(TmpReg, RegState::Kill);
    945 
    946       if (NumSubRegs > 1)
    947         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    948     }
    949   }
    950 
    951   if (M0CopyReg != AMDGPU::NoRegister) {
    952     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
    953       .addReg(M0CopyReg, RegState::Kill);
    954   }
    955 
    956   MI->eraseFromParent();
    957   return true;
    958 }
    959 
    960 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
    961 /// a VGPR and the stack slot can be safely eliminated when all other users are
    962 /// handled.
    963 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
    964   MachineBasicBlock::iterator MI,
    965   int FI,
    966   RegScavenger *RS) const {
    967   switch (MI->getOpcode()) {
    968   case AMDGPU::SI_SPILL_S512_SAVE:
    969   case AMDGPU::SI_SPILL_S256_SAVE:
    970   case AMDGPU::SI_SPILL_S128_SAVE:
    971   case AMDGPU::SI_SPILL_S64_SAVE:
    972   case AMDGPU::SI_SPILL_S32_SAVE:
    973     return spillSGPR(MI, FI, RS, true);
    974   case AMDGPU::SI_SPILL_S512_RESTORE:
    975   case AMDGPU::SI_SPILL_S256_RESTORE:
    976   case AMDGPU::SI_SPILL_S128_RESTORE:
    977   case AMDGPU::SI_SPILL_S64_RESTORE:
    978   case AMDGPU::SI_SPILL_S32_RESTORE:
    979     return restoreSGPR(MI, FI, RS, true);
    980   default:
    981     llvm_unreachable("not an SGPR spill instruction");
    982   }
    983 }
    984 
    985 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
    986                                         int SPAdj, unsigned FIOperandNum,
    987                                         RegScavenger *RS) const {
    988   MachineFunction *MF = MI->getParent()->getParent();
    989   MachineRegisterInfo &MRI = MF->getRegInfo();
    990   MachineBasicBlock *MBB = MI->getParent();
    991   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
    992   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
    993   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
    994   const SIInstrInfo *TII = ST.getInstrInfo();
    995   DebugLoc DL = MI->getDebugLoc();
    996 
    997   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
    998   int Index = MI->getOperand(FIOperandNum).getIndex();
    999 
   1000   switch (MI->getOpcode()) {
   1001     // SGPR register spill
   1002     case AMDGPU::SI_SPILL_S512_SAVE:
   1003     case AMDGPU::SI_SPILL_S256_SAVE:
   1004     case AMDGPU::SI_SPILL_S128_SAVE:
   1005     case AMDGPU::SI_SPILL_S64_SAVE:
   1006     case AMDGPU::SI_SPILL_S32_SAVE: {
   1007       spillSGPR(MI, Index, RS);
   1008       break;
   1009     }
   1010 
   1011     // SGPR register restore
   1012     case AMDGPU::SI_SPILL_S512_RESTORE:
   1013     case AMDGPU::SI_SPILL_S256_RESTORE:
   1014     case AMDGPU::SI_SPILL_S128_RESTORE:
   1015     case AMDGPU::SI_SPILL_S64_RESTORE:
   1016     case AMDGPU::SI_SPILL_S32_RESTORE: {
   1017       restoreSGPR(MI, Index, RS);
   1018       break;
   1019     }
   1020 
   1021     // VGPR register spill
   1022     case AMDGPU::SI_SPILL_V512_SAVE:
   1023     case AMDGPU::SI_SPILL_V256_SAVE:
   1024     case AMDGPU::SI_SPILL_V128_SAVE:
   1025     case AMDGPU::SI_SPILL_V96_SAVE:
   1026     case AMDGPU::SI_SPILL_V64_SAVE:
   1027     case AMDGPU::SI_SPILL_V32_SAVE: {
   1028       const MachineOperand *VData = TII->getNamedOperand(*MI,
   1029                                                          AMDGPU::OpName::vdata);
   1030       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
   1031             Index,
   1032             VData->getReg(), VData->isKill(),
   1033             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
   1034             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
   1035             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
   1036             *MI->memoperands_begin(),
   1037             RS);
   1038       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
   1039       MI->eraseFromParent();
   1040       break;
   1041     }
   1042     case AMDGPU::SI_SPILL_V32_RESTORE:
   1043     case AMDGPU::SI_SPILL_V64_RESTORE:
   1044     case AMDGPU::SI_SPILL_V96_RESTORE:
   1045     case AMDGPU::SI_SPILL_V128_RESTORE:
   1046     case AMDGPU::SI_SPILL_V256_RESTORE:
   1047     case AMDGPU::SI_SPILL_V512_RESTORE: {
   1048       const MachineOperand *VData = TII->getNamedOperand(*MI,
   1049                                                          AMDGPU::OpName::vdata);
   1050 
   1051       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
   1052             Index,
   1053             VData->getReg(), VData->isKill(),
   1054             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
   1055             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
   1056             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
   1057             *MI->memoperands_begin(),
   1058             RS);
   1059       MI->eraseFromParent();
   1060       break;
   1061     }
   1062 
   1063     default: {
   1064       const DebugLoc &DL = MI->getDebugLoc();
   1065       bool IsMUBUF = TII->isMUBUF(*MI);
   1066 
   1067       if (!IsMUBUF &&
   1068           MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
   1069         // Convert to an absolute stack address by finding the offset from the
   1070         // scratch wave base and scaling by the wave size.
   1071         //
   1072         // In an entry function/kernel the stack address is already the
   1073         // absolute address relative to the scratch wave offset.
   1074 
   1075         unsigned DiffReg
   1076           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
   1077 
   1078         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
   1079         unsigned ResultReg = IsCopy ?
   1080           MI->getOperand(0).getReg() :
   1081           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   1082 
   1083         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
   1084           .addReg(MFI->getFrameOffsetReg())
   1085           .addReg(MFI->getScratchWaveOffsetReg());
   1086 
   1087         int64_t Offset = FrameInfo.getObjectOffset(Index);
   1088         if (Offset == 0) {
   1089           // XXX - This never happens because of emergency scavenging slot at 0?
   1090           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
   1091             .addImm(Log2_32(ST.getWavefrontSize()))
   1092             .addReg(DiffReg);
   1093         } else {
   1094           unsigned ScaledReg
   1095             = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   1096 
   1097           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
   1098             .addImm(Log2_32(ST.getWavefrontSize()))
   1099             .addReg(DiffReg, RegState::Kill);
   1100 
   1101           // TODO: Fold if use instruction is another add of a constant.
   1102           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
   1103             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
   1104               .addImm(Offset)
   1105               .addReg(ScaledReg, RegState::Kill);
   1106           } else {
   1107             unsigned ConstOffsetReg
   1108               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
   1109 
   1110             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
   1111               .addImm(Offset);
   1112             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
   1113               .addReg(ConstOffsetReg, RegState::Kill)
   1114               .addReg(ScaledReg, RegState::Kill);
   1115           }
   1116         }
   1117 
   1118         // Don't introduce an extra copy if we're just materializing in a mov.
   1119         if (IsCopy)
   1120           MI->eraseFromParent();
   1121         else
   1122           FIOp.ChangeToRegister(ResultReg, false, false, true);
   1123         return;
   1124       }
   1125 
   1126       if (IsMUBUF) {
   1127         // Disable offen so we don't need a 0 vgpr base.
   1128         assert(static_cast<int>(FIOperandNum) ==
   1129                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
   1130                                           AMDGPU::OpName::vaddr));
   1131 
   1132         assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
   1133                == MFI->getFrameOffsetReg());
   1134 
   1135         int64_t Offset = FrameInfo.getObjectOffset(Index);
   1136         int64_t OldImm
   1137           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
   1138         int64_t NewOffset = OldImm + Offset;
   1139 
   1140         if (isUInt<12>(NewOffset) &&
   1141             buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
   1142           MI->eraseFromParent();
   1143           return;
   1144         }
   1145       }
   1146 
   1147       // If the offset is simply too big, don't convert to a scratch wave offset
   1148       // relative index.
   1149 
   1150       int64_t Offset = FrameInfo.getObjectOffset(Index);
   1151       FIOp.ChangeToImmediate(Offset);
   1152       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
   1153         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   1154         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
   1155           .addImm(Offset);
   1156         FIOp.ChangeToRegister(TmpReg, false, false, true);
   1157       }
   1158     }
   1159   }
   1160 }
   1161 
   1162 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
   1163   #define AMDGPU_REG_ASM_NAMES
   1164   #include "AMDGPURegAsmNames.inc.cpp"
   1165 
   1166   #define REG_RANGE(BeginReg, EndReg, RegTable)            \
   1167     if (Reg >= BeginReg && Reg <= EndReg) {                \
   1168       unsigned Index = Reg - BeginReg;                     \
   1169       assert(Index < array_lengthof(RegTable));            \
   1170       return RegTable[Index];                              \
   1171     }
   1172 
   1173   REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
   1174   REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
   1175   REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
   1176   REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
   1177   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
   1178             VGPR96RegNames);
   1179 
   1180   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
   1181             AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
   1182             VGPR128RegNames);
   1183   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
   1184             AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
   1185             SGPR128RegNames);
   1186 
   1187   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
   1188             AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
   1189             VGPR256RegNames);
   1190 
   1191   REG_RANGE(
   1192     AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
   1193     AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
   1194     VGPR512RegNames);
   1195 
   1196   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
   1197             AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
   1198             SGPR256RegNames);
   1199 
   1200   REG_RANGE(
   1201     AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
   1202     AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
   1203     SGPR512RegNames
   1204   );
   1205 
   1206 #undef REG_RANGE
   1207 
   1208   // FIXME: Rename flat_scr so we don't need to special case this.
   1209   switch (Reg) {
   1210   case AMDGPU::FLAT_SCR:
   1211     return "flat_scratch";
   1212   case AMDGPU::FLAT_SCR_LO:
   1213     return "flat_scratch_lo";
   1214   case AMDGPU::FLAT_SCR_HI:
   1215     return "flat_scratch_hi";
   1216   default:
   1217     // For the special named registers the default is fine.
   1218     return TargetRegisterInfo::getRegAsmName(Reg);
   1219   }
   1220 }
   1221 
   1222 // FIXME: This is very slow. It might be worth creating a map from physreg to
   1223 // register class.
   1224 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
   1225   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
   1226 
   1227   static const TargetRegisterClass *const BaseClasses[] = {
   1228     &AMDGPU::VGPR_32RegClass,
   1229     &AMDGPU::SReg_32RegClass,
   1230     &AMDGPU::VReg_64RegClass,
   1231     &AMDGPU::SReg_64RegClass,
   1232     &AMDGPU::VReg_96RegClass,
   1233     &AMDGPU::VReg_128RegClass,
   1234     &AMDGPU::SReg_128RegClass,
   1235     &AMDGPU::VReg_256RegClass,
   1236     &AMDGPU::SReg_256RegClass,
   1237     &AMDGPU::VReg_512RegClass,
   1238     &AMDGPU::SReg_512RegClass,
   1239     &AMDGPU::SCC_CLASSRegClass,
   1240     &AMDGPU::Pseudo_SReg_32RegClass,
   1241     &AMDGPU::Pseudo_SReg_128RegClass,
   1242   };
   1243 
   1244   for (const TargetRegisterClass *BaseClass : BaseClasses) {
   1245     if (BaseClass->contains(Reg)) {
   1246       return BaseClass;
   1247     }
   1248   }
   1249   return nullptr;
   1250 }
   1251 
   1252 // TODO: It might be helpful to have some target specific flags in
   1253 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
   1254 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
   1255   unsigned Size = getRegSizeInBits(*RC);
   1256   if (Size < 32)
   1257     return false;
   1258   switch (Size) {
   1259   case 32:
   1260     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
   1261   case 64:
   1262     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
   1263   case 96:
   1264     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
   1265   case 128:
   1266     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
   1267   case 256:
   1268     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
   1269   case 512:
   1270     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
   1271   default:
   1272     llvm_unreachable("Invalid register class size");
   1273   }
   1274 }
   1275 
   1276 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
   1277                                          const TargetRegisterClass *SRC) const {
   1278   switch (getRegSizeInBits(*SRC)) {
   1279   case 32:
   1280     return &AMDGPU::VGPR_32RegClass;
   1281   case 64:
   1282     return &AMDGPU::VReg_64RegClass;
   1283   case 96:
   1284     return &AMDGPU::VReg_96RegClass;
   1285   case 128:
   1286     return &AMDGPU::VReg_128RegClass;
   1287   case 256:
   1288     return &AMDGPU::VReg_256RegClass;
   1289   case 512:
   1290     return &AMDGPU::VReg_512RegClass;
   1291   default:
   1292     llvm_unreachable("Invalid register class size");
   1293   }
   1294 }
   1295 
   1296 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
   1297                                          const TargetRegisterClass *VRC) const {
   1298   switch (getRegSizeInBits(*VRC)) {
   1299   case 32:
   1300     return &AMDGPU::SGPR_32RegClass;
   1301   case 64:
   1302     return &AMDGPU::SReg_64RegClass;
   1303   case 128:
   1304     return &AMDGPU::SReg_128RegClass;
   1305   case 256:
   1306     return &AMDGPU::SReg_256RegClass;
   1307   case 512:
   1308     return &AMDGPU::SReg_512RegClass;
   1309   default:
   1310     llvm_unreachable("Invalid register class size");
   1311   }
   1312 }
   1313 
   1314 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
   1315                          const TargetRegisterClass *RC, unsigned SubIdx) const {
   1316   if (SubIdx == AMDGPU::NoSubRegister)
   1317     return RC;
   1318 
   1319   // We can assume that each lane corresponds to one 32-bit register.
   1320   unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
   1321   if (isSGPRClass(RC)) {
   1322     switch (Count) {
   1323     case 1:
   1324       return &AMDGPU::SGPR_32RegClass;
   1325     case 2:
   1326       return &AMDGPU::SReg_64RegClass;
   1327     case 4:
   1328       return &AMDGPU::SReg_128RegClass;
   1329     case 8:
   1330       return &AMDGPU::SReg_256RegClass;
   1331     case 16: /* fall-through */
   1332     default:
   1333       llvm_unreachable("Invalid sub-register class size");
   1334     }
   1335   } else {
   1336     switch (Count) {
   1337     case 1:
   1338       return &AMDGPU::VGPR_32RegClass;
   1339     case 2:
   1340       return &AMDGPU::VReg_64RegClass;
   1341     case 3:
   1342       return &AMDGPU::VReg_96RegClass;
   1343     case 4:
   1344       return &AMDGPU::VReg_128RegClass;
   1345     case 8:
   1346       return &AMDGPU::VReg_256RegClass;
   1347     case 16: /* fall-through */
   1348     default:
   1349       llvm_unreachable("Invalid sub-register class size");
   1350     }
   1351   }
   1352 }
   1353 
   1354 bool SIRegisterInfo::shouldRewriteCopySrc(
   1355   const TargetRegisterClass *DefRC,
   1356   unsigned DefSubReg,
   1357   const TargetRegisterClass *SrcRC,
   1358   unsigned SrcSubReg) const {
   1359   // We want to prefer the smallest register class possible, so we don't want to
   1360   // stop and rewrite on anything that looks like a subregister
   1361   // extract. Operations mostly don't care about the super register class, so we
   1362   // only want to stop on the most basic of copies between the same register
   1363   // class.
   1364   //
   1365   // e.g. if we have something like
   1366   // %0 = ...
   1367   // %1 = ...
   1368   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
   1369   // %3 = COPY %2, sub0
   1370   //
   1371   // We want to look through the COPY to find:
   1372   //  => %3 = COPY %0
   1373 
   1374   // Plain copy.
   1375   return getCommonSubClass(DefRC, SrcRC) != nullptr;
   1376 }
   1377 
   1378 /// Returns a register that is not used at any point in the function.
   1379 ///        If all registers are used, then this function will return
   1380 //         AMDGPU::NoRegister.
   1381 unsigned
   1382 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
   1383                                    const TargetRegisterClass *RC,
   1384                                    const MachineFunction &MF) const {
   1385 
   1386   for (unsigned Reg : *RC)
   1387     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
   1388       return Reg;
   1389   return AMDGPU::NoRegister;
   1390 }
   1391 
   1392 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
   1393                                                    unsigned EltSize) const {
   1394   if (EltSize == 4) {
   1395     static const int16_t Sub0_15[] = {
   1396       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
   1397       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
   1398       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
   1399       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
   1400     };
   1401 
   1402     static const int16_t Sub0_7[] = {
   1403       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
   1404       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
   1405     };
   1406 
   1407     static const int16_t Sub0_3[] = {
   1408       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
   1409     };
   1410 
   1411     static const int16_t Sub0_2[] = {
   1412       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
   1413     };
   1414 
   1415     static const int16_t Sub0_1[] = {
   1416       AMDGPU::sub0, AMDGPU::sub1,
   1417     };
   1418 
   1419     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
   1420     case 32:
   1421       return {};
   1422     case 64:
   1423       return makeArrayRef(Sub0_1);
   1424     case 96:
   1425       return makeArrayRef(Sub0_2);
   1426     case 128:
   1427       return makeArrayRef(Sub0_3);
   1428     case 256:
   1429       return makeArrayRef(Sub0_7);
   1430     case 512:
   1431       return makeArrayRef(Sub0_15);
   1432     default:
   1433       llvm_unreachable("unhandled register size");
   1434     }
   1435   }
   1436 
   1437   if (EltSize == 8) {
   1438     static const int16_t Sub0_15_64[] = {
   1439       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
   1440       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
   1441       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
   1442       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
   1443     };
   1444 
   1445     static const int16_t Sub0_7_64[] = {
   1446       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
   1447       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
   1448     };
   1449 
   1450 
   1451     static const int16_t Sub0_3_64[] = {
   1452       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
   1453     };
   1454 
   1455     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
   1456     case 64:
   1457       return {};
   1458     case 128:
   1459       return makeArrayRef(Sub0_3_64);
   1460     case 256:
   1461       return makeArrayRef(Sub0_7_64);
   1462     case 512:
   1463       return makeArrayRef(Sub0_15_64);
   1464     default:
   1465       llvm_unreachable("unhandled register size");
   1466     }
   1467   }
   1468 
   1469   assert(EltSize == 16 && "unhandled register spill split size");
   1470 
   1471   static const int16_t Sub0_15_128[] = {
   1472     AMDGPU::sub0_sub1_sub2_sub3,
   1473     AMDGPU::sub4_sub5_sub6_sub7,
   1474     AMDGPU::sub8_sub9_sub10_sub11,
   1475     AMDGPU::sub12_sub13_sub14_sub15
   1476   };
   1477 
   1478   static const int16_t Sub0_7_128[] = {
   1479     AMDGPU::sub0_sub1_sub2_sub3,
   1480     AMDGPU::sub4_sub5_sub6_sub7
   1481   };
   1482 
   1483   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
   1484   case 128:
   1485     return {};
   1486   case 256:
   1487     return makeArrayRef(Sub0_7_128);
   1488   case 512:
   1489     return makeArrayRef(Sub0_15_128);
   1490   default:
   1491     llvm_unreachable("unhandled register size");
   1492   }
   1493 }
   1494 
   1495 const TargetRegisterClass*
   1496 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
   1497                                   unsigned Reg) const {
   1498   if (TargetRegisterInfo::isVirtualRegister(Reg))
   1499     return  MRI.getRegClass(Reg);
   1500 
   1501   return getPhysRegClass(Reg);
   1502 }
   1503 
   1504 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
   1505                             unsigned Reg) const {
   1506   const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
   1507   assert(RC && "Register class for the reg not found");
   1508   return hasVGPRs(RC);
   1509 }
   1510 
   1511 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
   1512                                     const TargetRegisterClass *SrcRC,
   1513                                     unsigned SubReg,
   1514                                     const TargetRegisterClass *DstRC,
   1515                                     unsigned DstSubReg,
   1516                                     const TargetRegisterClass *NewRC,
   1517                                     LiveIntervals &LIS) const {
   1518   unsigned SrcSize = getRegSizeInBits(*SrcRC);
   1519   unsigned DstSize = getRegSizeInBits(*DstRC);
   1520   unsigned NewSize = getRegSizeInBits(*NewRC);
   1521 
   1522   // Do not increase size of registers beyond dword, we would need to allocate
   1523   // adjacent registers and constraint regalloc more than needed.
   1524 
   1525   // Always allow dword coalescing.
   1526   if (SrcSize <= 32 || DstSize <= 32)
   1527     return true;
   1528 
   1529   return NewSize <= DstSize || NewSize <= SrcSize;
   1530 }
   1531 
   1532 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   1533                                              MachineFunction &MF) const {
   1534 
   1535   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   1536   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   1537 
   1538   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
   1539                                                        MF.getFunction());
   1540   switch (RC->getID()) {
   1541   default:
   1542     return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
   1543   case AMDGPU::VGPR_32RegClassID:
   1544     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
   1545   case AMDGPU::SGPR_32RegClassID:
   1546     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
   1547   }
   1548 }
   1549 
   1550 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
   1551                                                 unsigned Idx) const {
   1552   if (Idx == getVGPRPressureSet())
   1553     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
   1554                                const_cast<MachineFunction &>(MF));
   1555 
   1556   if (Idx == getSGPRPressureSet())
   1557     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
   1558                                const_cast<MachineFunction &>(MF));
   1559 
   1560   return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
   1561 }
   1562 
   1563 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
   1564   static const int Empty[] = { -1 };
   1565 
   1566   if (hasRegUnit(AMDGPU::M0, RegUnit))
   1567     return Empty;
   1568   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
   1569 }
   1570 
   1571 unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
   1572   // Not a callee saved register.
   1573   return AMDGPU::SGPR30_SGPR31;
   1574 }
   1575 
   1576 const TargetRegisterClass *
   1577 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
   1578                                          const MachineRegisterInfo &MRI) const {
   1579   unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
   1580   const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
   1581   if (!RB)
   1582     return nullptr;
   1583 
   1584   switch (Size) {
   1585   case 32:
   1586     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
   1587                                                   &AMDGPU::SReg_32_XM0RegClass;
   1588   case 64:
   1589     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
   1590                                                    &AMDGPU::SReg_64_XEXECRegClass;
   1591   case 96:
   1592     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
   1593                                                   nullptr;
   1594   case 128:
   1595     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
   1596                                                   &AMDGPU::SReg_128RegClass;
   1597   default:
   1598     llvm_unreachable("not implemented");
   1599   }
   1600 }
   1601