Home | History | Annotate | Download | only in AMDGPU
      1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
     12 /// will sometimes generate these illegal copies in situations like this:
     13 ///
     14 ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
     15 ///
     16 /// BB0:
     17 ///   %vreg0 <sgpr> = SCALAR_INST
     18 ///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
     19 ///    ...
     20 ///    BRANCH %cond BB1, BB2
     21 ///  BB1:
     22 ///    %vreg2 <vgpr> = VECTOR_INST
     23 ///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
     24 ///  BB2:
     25 ///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
     26 ///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
     27 ///
     28 ///
     29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
     30 /// code will look like this:
     31 ///
     32 /// BB0:
     33 ///   %vreg0 <sgpr> = SCALAR_INST
     34 ///    ...
     35 ///    BRANCH %cond BB1, BB2
     36 /// BB1:
     37 ///   %vreg2 <vgpr> = VECTOR_INST
     38 ///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
     39 /// BB2:
     40 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
     41 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
     42 ///
     43 /// Now that the result of the PHI instruction is an SGPR, the register
     44 /// allocator is now forced to constrain the register class of %vreg3 to
     45 /// <sgpr> so we end up with final code like this:
     46 ///
     47 /// BB0:
     48 ///   %vreg0 <sgpr> = SCALAR_INST
     49 ///    ...
     50 ///    BRANCH %cond BB1, BB2
     51 /// BB1:
     52 ///   %vreg2 <vgpr> = VECTOR_INST
     53 ///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
     54 /// BB2:
     55 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
     56 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
     57 ///
     58 /// Now this code contains an illegal copy from a VGPR to an SGPR.
     59 ///
     60 /// In order to avoid this problem, this pass searches for PHI instructions
     61 /// which define a <vsrc> register and constrains its definition class to
     62 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
     63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
     64 /// will be unable to perform the COPY removal from the above example  which
     65 /// ultimately led to the creation of an illegal COPY.
     66 //===----------------------------------------------------------------------===//
     67 
     68 #include "AMDGPU.h"
     69 #include "AMDGPUSubtarget.h"
     70 #include "SIInstrInfo.h"
     71 #include "llvm/CodeGen/MachineFunctionPass.h"
     72 #include "llvm/CodeGen/MachineInstrBuilder.h"
     73 #include "llvm/CodeGen/MachineRegisterInfo.h"
     74 #include "llvm/Support/Debug.h"
     75 #include "llvm/Support/raw_ostream.h"
     76 #include "llvm/Target/TargetMachine.h"
     77 
     78 using namespace llvm;
     79 
     80 #define DEBUG_TYPE "si-fix-sgpr-copies"
     81 
     82 namespace {
     83 
     84 class SIFixSGPRCopies : public MachineFunctionPass {
     85 public:
     86   static char ID;
     87 
     88   SIFixSGPRCopies() : MachineFunctionPass(ID) { }
     89 
     90   bool runOnMachineFunction(MachineFunction &MF) override;
     91 
     92   const char *getPassName() const override {
     93     return "SI Fix SGPR copies";
     94   }
     95 
     96   void getAnalysisUsage(AnalysisUsage &AU) const override {
     97     AU.setPreservesCFG();
     98     MachineFunctionPass::getAnalysisUsage(AU);
     99   }
    100 };
    101 
    102 } // End anonymous namespace
    103 
    104 INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
    105                 "SI Fix SGPR copies", false, false)
    106 
    107 char SIFixSGPRCopies::ID = 0;
    108 
    109 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
    110 
    111 FunctionPass *llvm::createSIFixSGPRCopiesPass() {
    112   return new SIFixSGPRCopies();
    113 }
    114 
    115 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
    116   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
    117   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    118     if (!MI.getOperand(i).isReg() ||
    119         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
    120       continue;
    121 
    122     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
    123       return true;
    124   }
    125   return false;
    126 }
    127 
    128 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
    129 getCopyRegClasses(const MachineInstr &Copy,
    130                   const SIRegisterInfo &TRI,
    131                   const MachineRegisterInfo &MRI) {
    132   unsigned DstReg = Copy.getOperand(0).getReg();
    133   unsigned SrcReg = Copy.getOperand(1).getReg();
    134 
    135   const TargetRegisterClass *SrcRC =
    136     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
    137     MRI.getRegClass(SrcReg) :
    138     TRI.getPhysRegClass(SrcReg);
    139 
    140   // We don't really care about the subregister here.
    141   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
    142 
    143   const TargetRegisterClass *DstRC =
    144     TargetRegisterInfo::isVirtualRegister(DstReg) ?
    145     MRI.getRegClass(DstReg) :
    146     TRI.getPhysRegClass(DstReg);
    147 
    148   return std::make_pair(SrcRC, DstRC);
    149 }
    150 
    151 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
    152                              const TargetRegisterClass *DstRC,
    153                              const SIRegisterInfo &TRI) {
    154   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
    155 }
    156 
    157 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
    158                              const TargetRegisterClass *DstRC,
    159                              const SIRegisterInfo &TRI) {
    160   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
    161 }
    162 
    163 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
    164 //
    165 // SGPRx = ...
    166 // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
    167 // VGPRz = COPY SGPRy
    168 //
    169 // ==>
    170 //
    171 // VGPRx = COPY SGPRx
    172 // VGPRz = REG_SEQUENCE VGPRx, sub0
    173 //
    174 // This exposes immediate folding opportunities when materializing 64-bit
    175 // immediates.
    176 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
    177                                         const SIRegisterInfo *TRI,
    178                                         const SIInstrInfo *TII,
    179                                         MachineRegisterInfo &MRI) {
    180   assert(MI.isRegSequence());
    181 
    182   unsigned DstReg = MI.getOperand(0).getReg();
    183   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
    184     return false;
    185 
    186   if (!MRI.hasOneUse(DstReg))
    187     return false;
    188 
    189   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
    190   if (!CopyUse.isCopy())
    191     return false;
    192 
    193   const TargetRegisterClass *SrcRC, *DstRC;
    194   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
    195 
    196   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
    197     return false;
    198 
    199   // TODO: Could have multiple extracts?
    200   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
    201   if (SubReg != AMDGPU::NoSubRegister)
    202     return false;
    203 
    204   MRI.setRegClass(DstReg, DstRC);
    205 
    206   // SGPRx = ...
    207   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
    208   // VGPRz = COPY SGPRy
    209 
    210   // =>
    211   // VGPRx = COPY SGPRx
    212   // VGPRz = REG_SEQUENCE VGPRx, sub0
    213 
    214   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
    215 
    216   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
    217     unsigned SrcReg = MI.getOperand(I).getReg();
    218     unsigned SrcSubReg = MI.getOperand(I).getSubReg();
    219 
    220     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
    221     assert(TRI->isSGPRClass(SrcRC) &&
    222            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
    223 
    224     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
    225     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
    226 
    227     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
    228 
    229     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
    230       .addOperand(MI.getOperand(I));
    231 
    232     MI.getOperand(I).setReg(TmpReg);
    233   }
    234 
    235   CopyUse.eraseFromParent();
    236   return true;
    237 }
    238 
    239 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
    240   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    241   MachineRegisterInfo &MRI = MF.getRegInfo();
    242   const SIRegisterInfo *TRI = ST.getRegisterInfo();
    243   const SIInstrInfo *TII = ST.getInstrInfo();
    244 
    245   SmallVector<MachineInstr *, 16> Worklist;
    246 
    247   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
    248                                                   BI != BE; ++BI) {
    249 
    250     MachineBasicBlock &MBB = *BI;
    251     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    252          I != E; ++I) {
    253       MachineInstr &MI = *I;
    254 
    255       switch (MI.getOpcode()) {
    256       default:
    257         continue;
    258       case AMDGPU::COPY: {
    259         // If the destination register is a physical register there isn't really
    260         // much we can do to fix this.
    261         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
    262           continue;
    263 
    264         const TargetRegisterClass *SrcRC, *DstRC;
    265         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
    266         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
    267           DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
    268           TII->moveToVALU(MI);
    269         }
    270 
    271         break;
    272       }
    273       case AMDGPU::PHI: {
    274         DEBUG(dbgs() << "Fixing PHI: " << MI);
    275         unsigned Reg = MI.getOperand(0).getReg();
    276         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
    277           break;
    278 
    279         // If a PHI node defines an SGPR and any of its operands are VGPRs,
    280         // then we need to move it to the VALU.
    281         //
    282         // Also, if a PHI node defines an SGPR and has all SGPR operands
    283         // we must move it to the VALU, because the SGPR operands will
    284         // all end up being assigned the same register, which means
    285         // there is a potential for a conflict if different threads take
    286         // different control flow paths.
    287         //
    288         // For Example:
    289         //
    290         // sgpr0 = def;
    291         // ...
    292         // sgpr1 = def;
    293         // ...
    294         // sgpr2 = PHI sgpr0, sgpr1
    295         // use sgpr2;
    296         //
    297         // Will Become:
    298         //
    299         // sgpr2 = def;
    300         // ...
    301         // sgpr2 = def;
    302         // ...
    303         // use sgpr2
    304         //
    305         // FIXME: This is OK if the branching decision is made based on an
    306         // SGPR value.
    307         bool SGPRBranch = false;
    308 
    309         // The one exception to this rule is when one of the operands
    310         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
    311         // instruction.  In this case, there we know the program will
    312         // never enter the second block (the loop) without entering
    313         // the first block (where the condition is computed), so there
    314         // is no chance for values to be over-written.
    315 
    316         bool HasBreakDef = false;
    317         for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
    318           unsigned Reg = MI.getOperand(i).getReg();
    319           if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
    320             TII->moveToVALU(MI);
    321             break;
    322           }
    323           MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
    324           assert(DefInstr);
    325           switch(DefInstr->getOpcode()) {
    326 
    327           case AMDGPU::SI_BREAK:
    328           case AMDGPU::SI_IF_BREAK:
    329           case AMDGPU::SI_ELSE_BREAK:
    330           // If we see a PHI instruction that defines an SGPR, then that PHI
    331           // instruction has already been considered and should have
    332           // a *_BREAK as an operand.
    333           case AMDGPU::PHI:
    334             HasBreakDef = true;
    335             break;
    336           }
    337         }
    338 
    339         if (!SGPRBranch && !HasBreakDef)
    340           TII->moveToVALU(MI);
    341         break;
    342       }
    343       case AMDGPU::REG_SEQUENCE: {
    344         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
    345             !hasVGPROperands(MI, TRI)) {
    346           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
    347           continue;
    348         }
    349 
    350         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
    351 
    352         TII->moveToVALU(MI);
    353         break;
    354       }
    355       case AMDGPU::INSERT_SUBREG: {
    356         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
    357         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
    358         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
    359         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
    360         if (TRI->isSGPRClass(DstRC) &&
    361             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
    362           DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
    363           TII->moveToVALU(MI);
    364         }
    365         break;
    366       }
    367       }
    368     }
    369   }
    370 
    371   return true;
    372 }
    373