1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer 12 /// will sometimes generate these illegal copies in situations like this: 13 /// 14 /// Register Class <vsrc> is the union of <vgpr> and <sgpr> 15 /// 16 /// BB0: 17 /// %vreg0 <sgpr> = SCALAR_INST 18 /// %vreg1 <vsrc> = COPY %vreg0 <sgpr> 19 /// ... 20 /// BRANCH %cond BB1, BB2 21 /// BB1: 22 /// %vreg2 <vgpr> = VECTOR_INST 23 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 24 /// BB2: 25 /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> 26 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 27 /// 28 /// 29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting 30 /// code will look like this: 31 /// 32 /// BB0: 33 /// %vreg0 <sgpr> = SCALAR_INST 34 /// ... 35 /// BRANCH %cond BB1, BB2 36 /// BB1: 37 /// %vreg2 <vgpr> = VECTOR_INST 38 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 39 /// BB2: 40 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> 41 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 42 /// 43 /// Now that the result of the PHI instruction is an SGPR, the register 44 /// allocator is now forced to constrain the register class of %vreg3 to 45 /// <sgpr> so we end up with final code like this: 46 /// 47 /// BB0: 48 /// %vreg0 <sgpr> = SCALAR_INST 49 /// ... 50 /// BRANCH %cond BB1, BB2 51 /// BB1: 52 /// %vreg2 <vgpr> = VECTOR_INST 53 /// %vreg3 <sgpr> = COPY %vreg2 <vgpr> 54 /// BB2: 55 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> 56 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 57 /// 58 /// Now this code contains an illegal copy from a VGPR to an SGPR. 59 /// 60 /// In order to avoid this problem, this pass searches for PHI instructions 61 /// which define a <vsrc> register and constrains its definition class to 62 /// <vgpr> if the user of the PHI's definition register is a vector instruction. 63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer 64 /// will be unable to perform the COPY removal from the above example which 65 /// ultimately led to the creation of an illegal COPY. 66 //===----------------------------------------------------------------------===// 67 68 #include "AMDGPU.h" 69 #include "SIInstrInfo.h" 70 #include "llvm/CodeGen/MachineFunctionPass.h" 71 #include "llvm/CodeGen/MachineRegisterInfo.h" 72 #include "llvm/Target/TargetMachine.h" 73 74 using namespace llvm; 75 76 namespace { 77 78 class SIFixSGPRCopies : public MachineFunctionPass { 79 80 private: 81 static char ID; 82 const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI, 83 const MachineRegisterInfo &MRI, 84 unsigned Reg) const; 85 86 public: 87 SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } 88 89 virtual bool runOnMachineFunction(MachineFunction &MF); 90 91 const char *getPassName() const { 92 return "SI Fix SGPR copies"; 93 } 94 95 }; 96 97 } // End anonymous namespace 98 99 char SIFixSGPRCopies::ID = 0; 100 101 FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { 102 return new SIFixSGPRCopies(tm); 103 } 104 105 /// This functions walks the use/def chains starting with the definition of 106 /// \p Reg until it finds an Instruction that isn't a COPY returns 107 /// the register class of that instruction. 108 const TargetRegisterClass *SIFixSGPRCopies::inferRegClass( 109 const TargetRegisterInfo *TRI, 110 const MachineRegisterInfo &MRI, 111 unsigned Reg) const { 112 // The Reg parameter to the function must always be defined by either a PHI 113 // or a COPY, therefore it cannot be a physical register. 114 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 115 "Reg cannot be a physical register"); 116 117 const TargetRegisterClass *RC = MRI.getRegClass(Reg); 118 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), 119 E = MRI.use_end(); I != E; ++I) { 120 switch (I->getOpcode()) { 121 case AMDGPU::COPY: 122 RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI, 123 I->getOperand(0).getReg())); 124 break; 125 } 126 } 127 128 return RC; 129 } 130 131 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 132 MachineRegisterInfo &MRI = MF.getRegInfo(); 133 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 134 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 135 BI != BE; ++BI) { 136 137 MachineBasicBlock &MBB = *BI; 138 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 139 I != E; ++I) { 140 MachineInstr &MI = *I; 141 if (MI.getOpcode() != AMDGPU::PHI) { 142 continue; 143 } 144 unsigned Reg = MI.getOperand(0).getReg(); 145 const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg); 146 if (RC == &AMDGPU::VSrc_32RegClass) { 147 MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); 148 } 149 } 150 } 151 return false; 152 } 153