Home | History | Annotate | Download | only in AMDGPU
      1 //===----------------------- SIFrameLowering.cpp --------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 
     10 #include "SIFrameLowering.h"
     11 #include "SIInstrInfo.h"
     12 #include "SIMachineFunctionInfo.h"
     13 #include "SIRegisterInfo.h"
     14 #include "llvm/CodeGen/MachineFrameInfo.h"
     15 #include "llvm/CodeGen/MachineFunction.h"
     16 #include "llvm/CodeGen/MachineInstrBuilder.h"
     17 #include "llvm/CodeGen/RegisterScavenging.h"
     18 
     19 using namespace llvm;
     20 
     21 
     22 static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo,
     23                               const MachineFrameInfo *FrameInfo) {
     24   if (!FuncInfo->hasSpilledSGPRs())
     25     return false;
     26 
     27   if (FuncInfo->hasSpilledVGPRs())
     28     return false;
     29 
     30   for (int I = FrameInfo->getObjectIndexBegin(),
     31          E = FrameInfo->getObjectIndexEnd(); I != E; ++I) {
     32     if (!FrameInfo->isSpillSlotObjectIndex(I))
     33       return false;
     34   }
     35 
     36   return true;
     37 }
     38 
     39 static ArrayRef<MCPhysReg> getAllSGPR128() {
     40   return makeArrayRef(AMDGPU::SReg_128RegClass.begin(),
     41                       AMDGPU::SReg_128RegClass.getNumRegs());
     42 }
     43 
     44 static ArrayRef<MCPhysReg> getAllSGPRs() {
     45   return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
     46                       AMDGPU::SGPR_32RegClass.getNumRegs());
     47 }
     48 
     49 void SIFrameLowering::emitPrologue(MachineFunction &MF,
     50                                    MachineBasicBlock &MBB) const {
     51   if (!MF.getFrameInfo()->hasStackObjects())
     52     return;
     53 
     54   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
     55 
     56   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     57 
     58   // If we only have SGPR spills, we won't actually be using scratch memory
     59   // since these spill to VGPRs.
     60   //
     61   // FIXME: We should be cleaning up these unused SGPR spill frame indices
     62   // somewhere.
     63   if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
     64     return;
     65 
     66   const SIInstrInfo *TII =
     67       static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
     68   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
     69   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
     70 
     71   // We need to insert initialization of the scratch resource descriptor.
     72   unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
     73   assert(ScratchRsrcReg != AMDGPU::NoRegister);
     74 
     75   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
     76   assert(ScratchWaveOffsetReg != AMDGPU::NoRegister);
     77 
     78   unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
     79     MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
     80 
     81   unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
     82   if (ST.isAmdHsaOS()) {
     83     PreloadedPrivateBufferReg = TRI->getPreloadedValue(
     84       MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
     85   }
     86 
     87   // If we reserved the original input registers, we don't need to copy to the
     88   // reserved registers.
     89   if (ScratchRsrcReg == PreloadedPrivateBufferReg) {
     90     // We should always reserve these 5 registers at the same time.
     91     assert(ScratchWaveOffsetReg == PreloadedScratchWaveOffsetReg &&
     92            "scratch wave offset and private segment buffer inconsistent");
     93     return;
     94   }
     95 
     96 
     97   // We added live-ins during argument lowering, but since they were not used
     98   // they were deleted. We're adding the uses now, so add them back.
     99   MachineRegisterInfo &MRI = MF.getRegInfo();
    100   MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
    101   MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
    102 
    103   if (ST.isAmdHsaOS()) {
    104     MRI.addLiveIn(PreloadedPrivateBufferReg);
    105     MBB.addLiveIn(PreloadedPrivateBufferReg);
    106   }
    107 
    108   // We reserved the last registers for this. Shift it down to the end of those
    109   // which were actually used.
    110   //
    111   // FIXME: It might be safer to use a pseudoregister before replacement.
    112 
    113   // FIXME: We should be able to eliminate unused input registers. We only
    114   // cannot do this for the resources required for scratch access. For now we
    115   // skip over user SGPRs and may leave unused holes.
    116 
    117   // We find the resource first because it has an alignment requirement.
    118   if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
    119     MachineRegisterInfo &MRI = MF.getRegInfo();
    120 
    121     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
    122     // Skip the last 2 elements because the last one is reserved for VCC, and
    123     // this is the 2nd to last element already.
    124     for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
    125       // Pick the first unallocated one. Make sure we don't clobber the other
    126       // reserved input we needed.
    127       if (!MRI.isPhysRegUsed(Reg)) {
    128         assert(MRI.isAllocatable(Reg));
    129         MRI.replaceRegWith(ScratchRsrcReg, Reg);
    130         ScratchRsrcReg = Reg;
    131         MFI->setScratchRSrcReg(ScratchRsrcReg);
    132         break;
    133       }
    134     }
    135   }
    136 
    137   if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
    138     MachineRegisterInfo &MRI = MF.getRegInfo();
    139     // Skip the last 2 elements because the last one is reserved for VCC, and
    140     // this is the 2nd to last element already.
    141     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
    142     for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
    143       // Pick the first unallocated SGPR. Be careful not to pick an alias of the
    144       // scratch descriptor, since we havent added its uses yet.
    145       if (!MRI.isPhysRegUsed(Reg)) {
    146         assert(MRI.isAllocatable(Reg) &&
    147                !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
    148 
    149         MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
    150         ScratchWaveOffsetReg = Reg;
    151         MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
    152         break;
    153       }
    154     }
    155   }
    156 
    157 
    158   assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
    159 
    160   const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
    161   MachineBasicBlock::iterator I = MBB.begin();
    162   DebugLoc DL;
    163 
    164   if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
    165     // Make sure we emit the copy for the offset first. We may have chosen to copy
    166     // the buffer resource into a register that aliases the input offset register.
    167     BuildMI(MBB, I, DL, SMovB32, ScratchWaveOffsetReg)
    168       .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
    169   }
    170 
    171   if (ST.isAmdHsaOS()) {
    172     // Insert copies from argument register.
    173     assert(
    174       !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) &&
    175       !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchWaveOffsetReg));
    176 
    177     unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
    178     unsigned Rsrc23 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2_sub3);
    179 
    180     unsigned Lo = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub0_sub1);
    181     unsigned Hi = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub2_sub3);
    182 
    183     const MCInstrDesc &SMovB64 = TII->get(AMDGPU::S_MOV_B64);
    184 
    185     BuildMI(MBB, I, DL, SMovB64, Rsrc01)
    186       .addReg(Lo, RegState::Kill);
    187     BuildMI(MBB, I, DL, SMovB64, Rsrc23)
    188       .addReg(Hi, RegState::Kill);
    189   } else {
    190     unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
    191     unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
    192     unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
    193     unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
    194 
    195     // Use relocations to get the pointer, and setup the other bits manually.
    196     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
    197     BuildMI(MBB, I, DL, SMovB32, Rsrc0)
    198       .addExternalSymbol("SCRATCH_RSRC_DWORD0")
    199       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
    200 
    201     BuildMI(MBB, I, DL, SMovB32, Rsrc1)
    202       .addExternalSymbol("SCRATCH_RSRC_DWORD1")
    203       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
    204 
    205     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
    206       .addImm(Rsrc23 & 0xffffffff)
    207       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
    208 
    209     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
    210       .addImm(Rsrc23 >> 32)
    211       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
    212   }
    213 
    214   // Make the register selected live throughout the function.
    215   for (MachineBasicBlock &OtherBB : MF) {
    216     if (&OtherBB == &MBB)
    217       continue;
    218 
    219     OtherBB.addLiveIn(ScratchRsrcReg);
    220     OtherBB.addLiveIn(ScratchWaveOffsetReg);
    221   }
    222 }
    223 
    224 void SIFrameLowering::processFunctionBeforeFrameFinalized(
    225   MachineFunction &MF,
    226   RegScavenger *RS) const {
    227   MachineFrameInfo *MFI = MF.getFrameInfo();
    228 
    229   if (!MFI->hasStackObjects())
    230     return;
    231 
    232   bool MayNeedScavengingEmergencySlot = MFI->hasStackObjects();
    233 
    234   assert((RS || !MayNeedScavengingEmergencySlot) &&
    235          "RegScavenger required if spilling");
    236 
    237   if (MayNeedScavengingEmergencySlot) {
    238     int ScavengeFI = MFI->CreateSpillStackObject(
    239       AMDGPU::SGPR_32RegClass.getSize(),
    240       AMDGPU::SGPR_32RegClass.getAlignment());
    241     RS->addScavengingFrameIndex(ScavengeFI);
    242   }
    243 }
    244