Home | History | Annotate | Download | only in AMDGPU
      1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
     12 /// will sometimes generate these illegal copies in situations like this:
     13 ///
     14 ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
     15 ///
     16 /// BB0:
     17 ///   %vreg0 <sgpr> = SCALAR_INST
     18 ///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
     19 ///    ...
     20 ///    BRANCH %cond BB1, BB2
     21 ///  BB1:
     22 ///    %vreg2 <vgpr> = VECTOR_INST
     23 ///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
     24 ///  BB2:
     25 ///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
     26 ///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
     27 ///
     28 ///
     29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
     30 /// code will look like this:
     31 ///
     32 /// BB0:
     33 ///   %vreg0 <sgpr> = SCALAR_INST
     34 ///    ...
     35 ///    BRANCH %cond BB1, BB2
     36 /// BB1:
     37 ///   %vreg2 <vgpr> = VECTOR_INST
     38 ///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
     39 /// BB2:
     40 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
     41 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
     42 ///
     43 /// Now that the result of the PHI instruction is an SGPR, the register
     44 /// allocator is now forced to constrain the register class of %vreg3 to
     45 /// <sgpr> so we end up with final code like this:
     46 ///
     47 /// BB0:
     48 ///   %vreg0 <sgpr> = SCALAR_INST
     49 ///    ...
     50 ///    BRANCH %cond BB1, BB2
     51 /// BB1:
     52 ///   %vreg2 <vgpr> = VECTOR_INST
     53 ///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
     54 /// BB2:
     55 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
     56 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
     57 ///
     58 /// Now this code contains an illegal copy from a VGPR to an SGPR.
     59 ///
     60 /// In order to avoid this problem, this pass searches for PHI instructions
     61 /// which define a <vsrc> register and constrains its definition class to
     62 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
     63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
     64 /// will be unable to perform the COPY removal from the above example  which
     65 /// ultimately led to the creation of an illegal COPY.
     66 //===----------------------------------------------------------------------===//
     67 
     68 #include "AMDGPU.h"
     69 #include "AMDGPUSubtarget.h"
     70 #include "SIInstrInfo.h"
     71 #include "llvm/CodeGen/MachineFunctionPass.h"
     72 #include "llvm/CodeGen/MachineInstrBuilder.h"
     73 #include "llvm/CodeGen/MachineRegisterInfo.h"
     74 #include "llvm/Support/Debug.h"
     75 #include "llvm/Support/raw_ostream.h"
     76 #include "llvm/Target/TargetMachine.h"
     77 
     78 using namespace llvm;
     79 
     80 #define DEBUG_TYPE "sgpr-copies"
     81 
     82 namespace {
     83 
     84 class SIFixSGPRCopies : public MachineFunctionPass {
     85 public:
     86   static char ID;
     87 
     88   SIFixSGPRCopies() : MachineFunctionPass(ID) { }
     89 
     90   bool runOnMachineFunction(MachineFunction &MF) override;
     91 
     92   const char *getPassName() const override {
     93     return "SI Fix SGPR copies";
     94   }
     95 
     96   void getAnalysisUsage(AnalysisUsage &AU) const override {
     97     AU.setPreservesCFG();
     98     MachineFunctionPass::getAnalysisUsage(AU);
     99   }
    100 };
    101 
    102 } // End anonymous namespace
    103 
    104 INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
    105                 "SI Fix SGPR copies", false, false)
    106 
    107 char SIFixSGPRCopies::ID = 0;
    108 
    109 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
    110 
    111 FunctionPass *llvm::createSIFixSGPRCopiesPass() {
    112   return new SIFixSGPRCopies();
    113 }
    114 
    115 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
    116   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
    117   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    118     if (!MI.getOperand(i).isReg() ||
    119         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
    120       continue;
    121 
    122     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
    123       return true;
    124   }
    125   return false;
    126 }
    127 
    128 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
    129 getCopyRegClasses(const MachineInstr &Copy,
    130                   const SIRegisterInfo &TRI,
    131                   const MachineRegisterInfo &MRI) {
    132   unsigned DstReg = Copy.getOperand(0).getReg();
    133   unsigned SrcReg = Copy.getOperand(1).getReg();
    134 
    135   const TargetRegisterClass *SrcRC =
    136     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
    137     MRI.getRegClass(SrcReg) :
    138     TRI.getPhysRegClass(SrcReg);
    139 
    140   // We don't really care about the subregister here.
    141   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
    142 
    143   const TargetRegisterClass *DstRC =
    144     TargetRegisterInfo::isVirtualRegister(DstReg) ?
    145     MRI.getRegClass(DstReg) :
    146     TRI.getPhysRegClass(DstReg);
    147 
    148   return std::make_pair(SrcRC, DstRC);
    149 }
    150 
    151 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
    152                              const TargetRegisterClass *DstRC,
    153                              const SIRegisterInfo &TRI) {
    154   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
    155 }
    156 
    157 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
    158                              const TargetRegisterClass *DstRC,
    159                              const SIRegisterInfo &TRI) {
    160   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
    161 }
    162 
    163 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
    164 //
    165 // SGPRx = ...
    166 // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
    167 // VGPRz = COPY SGPRy
    168 //
    169 // ==>
    170 //
    171 // VGPRx = COPY SGPRx
    172 // VGPRz = REG_SEQUENCE VGPRx, sub0
    173 //
    174 // This exposes immediate folding opportunities when materializing 64-bit
    175 // immediates.
    176 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
    177                                         const SIRegisterInfo *TRI,
    178                                         const SIInstrInfo *TII,
    179                                         MachineRegisterInfo &MRI) {
    180   assert(MI.isRegSequence());
    181 
    182   unsigned DstReg = MI.getOperand(0).getReg();
    183   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
    184     return false;
    185 
    186   if (!MRI.hasOneUse(DstReg))
    187     return false;
    188 
    189   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
    190   if (!CopyUse.isCopy())
    191     return false;
    192 
    193   const TargetRegisterClass *SrcRC, *DstRC;
    194   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
    195 
    196   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
    197     return false;
    198 
    199   // TODO: Could have multiple extracts?
    200   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
    201   if (SubReg != AMDGPU::NoSubRegister)
    202     return false;
    203 
    204   MRI.setRegClass(DstReg, DstRC);
    205 
    206   // SGPRx = ...
    207   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
    208   // VGPRz = COPY SGPRy
    209 
    210   // =>
    211   // VGPRx = COPY SGPRx
    212   // VGPRz = REG_SEQUENCE VGPRx, sub0
    213 
    214   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
    215 
    216   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
    217     unsigned SrcReg = MI.getOperand(I).getReg();
    218     unsigned SrcSubReg = MI.getOperand(I).getReg();
    219 
    220     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
    221     assert(TRI->isSGPRClass(SrcRC) &&
    222            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
    223 
    224     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
    225     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
    226 
    227     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
    228 
    229     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
    230       .addOperand(MI.getOperand(I));
    231 
    232     MI.getOperand(I).setReg(TmpReg);
    233   }
    234 
    235   CopyUse.eraseFromParent();
    236   return true;
    237 }
    238 
    239 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
    240   MachineRegisterInfo &MRI = MF.getRegInfo();
    241   const SIRegisterInfo *TRI =
    242       static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
    243   const SIInstrInfo *TII =
    244       static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
    245 
    246   SmallVector<MachineInstr *, 16> Worklist;
    247 
    248   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
    249                                                   BI != BE; ++BI) {
    250 
    251     MachineBasicBlock &MBB = *BI;
    252     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    253          I != E; ++I) {
    254       MachineInstr &MI = *I;
    255 
    256       switch (MI.getOpcode()) {
    257       default:
    258         continue;
    259       case AMDGPU::COPY: {
    260         // If the destination register is a physical register there isn't really
    261         // much we can do to fix this.
    262         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
    263           continue;
    264 
    265         const TargetRegisterClass *SrcRC, *DstRC;
    266         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
    267         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
    268           DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
    269           TII->moveToVALU(MI);
    270         }
    271 
    272         break;
    273       }
    274       case AMDGPU::PHI: {
    275         DEBUG(dbgs() << "Fixing PHI: " << MI);
    276         unsigned Reg = MI.getOperand(0).getReg();
    277         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
    278           break;
    279 
    280         // If a PHI node defines an SGPR and any of its operands are VGPRs,
    281         // then we need to move it to the VALU.
    282         //
    283         // Also, if a PHI node defines an SGPR and has all SGPR operands
    284         // we must move it to the VALU, because the SGPR operands will
    285         // all end up being assigned the same register, which means
    286         // there is a potential for a conflict if different threads take
    287         // different control flow paths.
    288         //
    289         // For Example:
    290         //
    291         // sgpr0 = def;
    292         // ...
    293         // sgpr1 = def;
    294         // ...
    295         // sgpr2 = PHI sgpr0, sgpr1
    296         // use sgpr2;
    297         //
    298         // Will Become:
    299         //
    300         // sgpr2 = def;
    301         // ...
    302         // sgpr2 = def;
    303         // ...
    304         // use sgpr2
    305         //
    306         // FIXME: This is OK if the branching decision is made based on an
    307         // SGPR value.
    308         bool SGPRBranch = false;
    309 
    310         // The one exception to this rule is when one of the operands
    311         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
    312         // instruction.  In this case, there we know the program will
    313         // never enter the second block (the loop) without entering
    314         // the first block (where the condition is computed), so there
    315         // is no chance for values to be over-written.
    316 
    317         bool HasBreakDef = false;
    318         for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
    319           unsigned Reg = MI.getOperand(i).getReg();
    320           if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
    321             TII->moveToVALU(MI);
    322             break;
    323           }
    324           MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
    325           assert(DefInstr);
    326           switch(DefInstr->getOpcode()) {
    327 
    328           case AMDGPU::SI_BREAK:
    329           case AMDGPU::SI_IF_BREAK:
    330           case AMDGPU::SI_ELSE_BREAK:
    331           // If we see a PHI instruction that defines an SGPR, then that PHI
    332           // instruction has already been considered and should have
    333           // a *_BREAK as an operand.
    334           case AMDGPU::PHI:
    335             HasBreakDef = true;
    336             break;
    337           }
    338         }
    339 
    340         if (!SGPRBranch && !HasBreakDef)
    341           TII->moveToVALU(MI);
    342         break;
    343       }
    344       case AMDGPU::REG_SEQUENCE: {
    345         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
    346             !hasVGPROperands(MI, TRI)) {
    347           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
    348           continue;
    349         }
    350 
    351         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
    352 
    353         TII->moveToVALU(MI);
    354         break;
    355       }
    356       case AMDGPU::INSERT_SUBREG: {
    357         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
    358         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
    359         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
    360         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
    361         if (TRI->isSGPRClass(DstRC) &&
    362             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
    363           DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
    364           TII->moveToVALU(MI);
    365         }
    366         break;
    367       }
    368       }
    369     }
    370   }
    371 
    372   return true;
    373 }
    374