1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer 12 /// will sometimes generate these illegal copies in situations like this: 13 /// 14 /// Register Class <vsrc> is the union of <vgpr> and <sgpr> 15 /// 16 /// BB0: 17 /// %vreg0 <sgpr> = SCALAR_INST 18 /// %vreg1 <vsrc> = COPY %vreg0 <sgpr> 19 /// ... 20 /// BRANCH %cond BB1, BB2 21 /// BB1: 22 /// %vreg2 <vgpr> = VECTOR_INST 23 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 24 /// BB2: 25 /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> 26 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 27 /// 28 /// 29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting 30 /// code will look like this: 31 /// 32 /// BB0: 33 /// %vreg0 <sgpr> = SCALAR_INST 34 /// ... 35 /// BRANCH %cond BB1, BB2 36 /// BB1: 37 /// %vreg2 <vgpr> = VECTOR_INST 38 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 39 /// BB2: 40 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> 41 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 42 /// 43 /// Now that the result of the PHI instruction is an SGPR, the register 44 /// allocator is now forced to constrain the register class of %vreg3 to 45 /// <sgpr> so we end up with final code like this: 46 /// 47 /// BB0: 48 /// %vreg0 <sgpr> = SCALAR_INST 49 /// ... 50 /// BRANCH %cond BB1, BB2 51 /// BB1: 52 /// %vreg2 <vgpr> = VECTOR_INST 53 /// %vreg3 <sgpr> = COPY %vreg2 <vgpr> 54 /// BB2: 55 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> 56 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 57 /// 58 /// Now this code contains an illegal copy from a VGPR to an SGPR. 59 /// 60 /// In order to avoid this problem, this pass searches for PHI instructions 61 /// which define a <vsrc> register and constrains its definition class to 62 /// <vgpr> if the user of the PHI's definition register is a vector instruction. 63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer 64 /// will be unable to perform the COPY removal from the above example which 65 /// ultimately led to the creation of an illegal COPY. 66 //===----------------------------------------------------------------------===// 67 68 #include "AMDGPU.h" 69 #include "AMDGPUSubtarget.h" 70 #include "SIInstrInfo.h" 71 #include "llvm/CodeGen/MachineFunctionPass.h" 72 #include "llvm/CodeGen/MachineInstrBuilder.h" 73 #include "llvm/CodeGen/MachineRegisterInfo.h" 74 #include "llvm/Support/Debug.h" 75 #include "llvm/Support/raw_ostream.h" 76 #include "llvm/Target/TargetMachine.h" 77 78 using namespace llvm; 79 80 #define DEBUG_TYPE "si-fix-sgpr-copies" 81 82 namespace { 83 84 class SIFixSGPRCopies : public MachineFunctionPass { 85 public: 86 static char ID; 87 88 SIFixSGPRCopies() : MachineFunctionPass(ID) { } 89 90 bool runOnMachineFunction(MachineFunction &MF) override; 91 92 const char *getPassName() const override { 93 return "SI Fix SGPR copies"; 94 } 95 96 void getAnalysisUsage(AnalysisUsage &AU) const override { 97 AU.setPreservesCFG(); 98 MachineFunctionPass::getAnalysisUsage(AU); 99 } 100 }; 101 102 } // End anonymous namespace 103 104 INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE, 105 "SI Fix SGPR copies", false, false) 106 107 char SIFixSGPRCopies::ID = 0; 108 109 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID; 110 111 FunctionPass *llvm::createSIFixSGPRCopiesPass() { 112 return new SIFixSGPRCopies(); 113 } 114 115 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { 116 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 117 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 118 if (!MI.getOperand(i).isReg() || 119 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 120 continue; 121 122 if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) 123 return true; 124 } 125 return false; 126 } 127 128 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 129 getCopyRegClasses(const MachineInstr &Copy, 130 const SIRegisterInfo &TRI, 131 const MachineRegisterInfo &MRI) { 132 unsigned DstReg = Copy.getOperand(0).getReg(); 133 unsigned SrcReg = Copy.getOperand(1).getReg(); 134 135 const TargetRegisterClass *SrcRC = 136 TargetRegisterInfo::isVirtualRegister(SrcReg) ? 137 MRI.getRegClass(SrcReg) : 138 TRI.getPhysRegClass(SrcReg); 139 140 // We don't really care about the subregister here. 141 // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); 142 143 const TargetRegisterClass *DstRC = 144 TargetRegisterInfo::isVirtualRegister(DstReg) ? 145 MRI.getRegClass(DstReg) : 146 TRI.getPhysRegClass(DstReg); 147 148 return std::make_pair(SrcRC, DstRC); 149 } 150 151 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, 152 const TargetRegisterClass *DstRC, 153 const SIRegisterInfo &TRI) { 154 return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC); 155 } 156 157 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, 158 const TargetRegisterClass *DstRC, 159 const SIRegisterInfo &TRI) { 160 return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC); 161 } 162 163 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. 164 // 165 // SGPRx = ... 166 // SGPRy = REG_SEQUENCE SGPRx, sub0 ... 167 // VGPRz = COPY SGPRy 168 // 169 // ==> 170 // 171 // VGPRx = COPY SGPRx 172 // VGPRz = REG_SEQUENCE VGPRx, sub0 173 // 174 // This exposes immediate folding opportunities when materializing 64-bit 175 // immediates. 176 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, 177 const SIRegisterInfo *TRI, 178 const SIInstrInfo *TII, 179 MachineRegisterInfo &MRI) { 180 assert(MI.isRegSequence()); 181 182 unsigned DstReg = MI.getOperand(0).getReg(); 183 if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) 184 return false; 185 186 if (!MRI.hasOneUse(DstReg)) 187 return false; 188 189 MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg); 190 if (!CopyUse.isCopy()) 191 return false; 192 193 const TargetRegisterClass *SrcRC, *DstRC; 194 std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); 195 196 if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) 197 return false; 198 199 // TODO: Could have multiple extracts? 200 unsigned SubReg = CopyUse.getOperand(1).getSubReg(); 201 if (SubReg != AMDGPU::NoSubRegister) 202 return false; 203 204 MRI.setRegClass(DstReg, DstRC); 205 206 // SGPRx = ... 207 // SGPRy = REG_SEQUENCE SGPRx, sub0 ... 208 // VGPRz = COPY SGPRy 209 210 // => 211 // VGPRx = COPY SGPRx 212 // VGPRz = REG_SEQUENCE VGPRx, sub0 213 214 MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); 215 216 for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { 217 unsigned SrcReg = MI.getOperand(I).getReg(); 218 unsigned SrcSubReg = MI.getOperand(I).getSubReg(); 219 220 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); 221 assert(TRI->isSGPRClass(SrcRC) && 222 "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); 223 224 SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); 225 const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); 226 227 unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); 228 229 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg) 230 .addOperand(MI.getOperand(I)); 231 232 MI.getOperand(I).setReg(TmpReg); 233 } 234 235 CopyUse.eraseFromParent(); 236 return true; 237 } 238 239 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 240 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 241 MachineRegisterInfo &MRI = MF.getRegInfo(); 242 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 243 const SIInstrInfo *TII = ST.getInstrInfo(); 244 245 SmallVector<MachineInstr *, 16> Worklist; 246 247 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 248 BI != BE; ++BI) { 249 250 MachineBasicBlock &MBB = *BI; 251 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 252 I != E; ++I) { 253 MachineInstr &MI = *I; 254 255 switch (MI.getOpcode()) { 256 default: 257 continue; 258 case AMDGPU::COPY: { 259 // If the destination register is a physical register there isn't really 260 // much we can do to fix this. 261 if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) 262 continue; 263 264 const TargetRegisterClass *SrcRC, *DstRC; 265 std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); 266 if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { 267 DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); 268 TII->moveToVALU(MI); 269 } 270 271 break; 272 } 273 case AMDGPU::PHI: { 274 DEBUG(dbgs() << "Fixing PHI: " << MI); 275 unsigned Reg = MI.getOperand(0).getReg(); 276 if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) 277 break; 278 279 // If a PHI node defines an SGPR and any of its operands are VGPRs, 280 // then we need to move it to the VALU. 281 // 282 // Also, if a PHI node defines an SGPR and has all SGPR operands 283 // we must move it to the VALU, because the SGPR operands will 284 // all end up being assigned the same register, which means 285 // there is a potential for a conflict if different threads take 286 // different control flow paths. 287 // 288 // For Example: 289 // 290 // sgpr0 = def; 291 // ... 292 // sgpr1 = def; 293 // ... 294 // sgpr2 = PHI sgpr0, sgpr1 295 // use sgpr2; 296 // 297 // Will Become: 298 // 299 // sgpr2 = def; 300 // ... 301 // sgpr2 = def; 302 // ... 303 // use sgpr2 304 // 305 // FIXME: This is OK if the branching decision is made based on an 306 // SGPR value. 307 bool SGPRBranch = false; 308 309 // The one exception to this rule is when one of the operands 310 // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK 311 // instruction. In this case, there we know the program will 312 // never enter the second block (the loop) without entering 313 // the first block (where the condition is computed), so there 314 // is no chance for values to be over-written. 315 316 bool HasBreakDef = false; 317 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 318 unsigned Reg = MI.getOperand(i).getReg(); 319 if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { 320 TII->moveToVALU(MI); 321 break; 322 } 323 MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); 324 assert(DefInstr); 325 switch(DefInstr->getOpcode()) { 326 327 case AMDGPU::SI_BREAK: 328 case AMDGPU::SI_IF_BREAK: 329 case AMDGPU::SI_ELSE_BREAK: 330 // If we see a PHI instruction that defines an SGPR, then that PHI 331 // instruction has already been considered and should have 332 // a *_BREAK as an operand. 333 case AMDGPU::PHI: 334 HasBreakDef = true; 335 break; 336 } 337 } 338 339 if (!SGPRBranch && !HasBreakDef) 340 TII->moveToVALU(MI); 341 break; 342 } 343 case AMDGPU::REG_SEQUENCE: { 344 if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || 345 !hasVGPROperands(MI, TRI)) { 346 foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); 347 continue; 348 } 349 350 DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); 351 352 TII->moveToVALU(MI); 353 break; 354 } 355 case AMDGPU::INSERT_SUBREG: { 356 const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; 357 DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); 358 Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); 359 Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); 360 if (TRI->isSGPRClass(DstRC) && 361 (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { 362 DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); 363 TII->moveToVALU(MI); 364 } 365 break; 366 } 367 } 368 } 369 } 370 371 return true; 372 } 373