1 //===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass eliminates PHI instructions by aggressively coalescing the copies 11 // that would be inserted by a naive algorithm and only inserting the copies 12 // that are necessary. The coalescing technique initially assumes that all 13 // registers appearing in a PHI instruction do not interfere. It then eliminates 14 // proven interferences, using dominators to only perform a linear number of 15 // interference tests instead of the quadratic number of interference tests 16 // that this would naively require. This is a technique derived from: 17 // 18 // Budimlic, et al. Fast copy coalescing and live-range identification. 19 // In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language 20 // Design and Implementation (Berlin, Germany, June 17 - 19, 2002). 21 // PLDI '02. ACM, New York, NY, 25-32. 22 // 23 // The original implementation constructs a data structure they call a dominance 24 // forest for this purpose. The dominance forest was shown to be unnecessary, 25 // as it is possible to emulate the creation and traversal of a dominance forest 26 // by directly using the dominator tree, rather than actually constructing the 27 // dominance forest. This technique is explained in: 28 // 29 // Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code 30 // Quality and Efficiency, 31 // In Proceedings of the 7th annual IEEE/ACM International Symposium on Code 32 // Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). 33 // CGO '09. IEEE, Washington, DC, 114-125. 34 // 35 // Careful implementation allows for all of the dominator forest interference 36 // checks to be performed at once in a single depth-first traversal of the 37 // dominator tree, which is what is implemented here. 38 // 39 //===----------------------------------------------------------------------===// 40 41 #define DEBUG_TYPE "strongphielim" 42 #include "PHIEliminationUtils.h" 43 #include "llvm/CodeGen/Passes.h" 44 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 45 #include "llvm/CodeGen/MachineDominators.h" 46 #include "llvm/CodeGen/MachineFunctionPass.h" 47 #include "llvm/CodeGen/MachineInstrBuilder.h" 48 #include "llvm/CodeGen/MachineRegisterInfo.h" 49 #include "llvm/Target/TargetInstrInfo.h" 50 #include "llvm/ADT/Statistic.h" 51 #include "llvm/Support/Debug.h" 52 using namespace llvm; 53 54 namespace { 55 class StrongPHIElimination : public MachineFunctionPass { 56 public: 57 static char ID; // Pass identification, replacement for typeid 58 StrongPHIElimination() : MachineFunctionPass(ID) { 59 initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); 60 } 61 62 virtual void getAnalysisUsage(AnalysisUsage&) const; 63 bool runOnMachineFunction(MachineFunction&); 64 65 private: 66 /// This struct represents a single node in the union-find data structure 67 /// representing the variable congruence classes. There is one difference 68 /// from a normal union-find data structure. We steal two bits from the parent 69 /// pointer . One of these bits is used to represent whether the register 70 /// itself has been isolated, and the other is used to represent whether the 71 /// PHI with that register as its destination has been isolated. 72 /// 73 /// Note that this leads to the strange situation where the leader of a 74 /// congruence class may no longer logically be a member, due to being 75 /// isolated. 76 struct Node { 77 enum Flags { 78 kRegisterIsolatedFlag = 1, 79 kPHIIsolatedFlag = 2 80 }; 81 Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } 82 83 Node *getLeader(); 84 85 PointerIntPair<Node*, 2> parent; 86 unsigned value; 87 unsigned rank; 88 }; 89 90 /// Add a register in a new congruence class containing only itself. 91 void addReg(unsigned); 92 93 /// Join the congruence classes of two registers. This function is biased 94 /// towards the left argument, i.e. after 95 /// 96 /// addReg(r2); 97 /// unionRegs(r1, r2); 98 /// 99 /// the leader of the unioned congruence class is the same as the leader of 100 /// r1's congruence class prior to the union. This is actually relied upon 101 /// in the copy insertion code. 102 void unionRegs(unsigned, unsigned); 103 104 /// Get the color of a register. The color is 0 if the register has been 105 /// isolated. 106 unsigned getRegColor(unsigned); 107 108 // Isolate a register. 109 void isolateReg(unsigned); 110 111 /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been 112 /// isolated. Otherwise, it is the original color of its destination and 113 /// all of its operands (before they were isolated, if they were). 114 unsigned getPHIColor(MachineInstr*); 115 116 /// Isolate a PHI. 117 void isolatePHI(MachineInstr*); 118 119 /// Traverses a basic block, splitting any interferences found between 120 /// registers in the same congruence class. It takes two DenseMaps as 121 /// arguments that it also updates: CurrentDominatingParent, which maps 122 /// a color to the register in that congruence class whose definition was 123 /// most recently seen, and ImmediateDominatingParent, which maps a register 124 /// to the register in the same congruence class that most immediately 125 /// dominates it. 126 /// 127 /// This function assumes that it is being called in a depth-first traversal 128 /// of the dominator tree. 129 void SplitInterferencesForBasicBlock( 130 MachineBasicBlock&, 131 DenseMap<unsigned, unsigned> &CurrentDominatingParent, 132 DenseMap<unsigned, unsigned> &ImmediateDominatingParent); 133 134 // Lowers a PHI instruction, inserting copies of the source and destination 135 // registers as necessary. 136 void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); 137 138 // Merges the live interval of Reg into NewReg and renames Reg to NewReg 139 // everywhere that Reg appears. Requires Reg and NewReg to have non- 140 // overlapping lifetimes. 141 void MergeLIsAndRename(unsigned Reg, unsigned NewReg); 142 143 MachineRegisterInfo *MRI; 144 const TargetInstrInfo *TII; 145 MachineDominatorTree *DT; 146 LiveIntervals *LI; 147 148 BumpPtrAllocator Allocator; 149 150 DenseMap<unsigned, Node*> RegNodeMap; 151 152 // Maps a basic block to a list of its defs of registers that appear as PHI 153 // sources. 154 DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs; 155 156 // Maps a color to a pair of a MachineInstr* and a virtual register, which 157 // is the operand of that PHI corresponding to the current basic block. 158 DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor; 159 160 // FIXME: Can these two data structures be combined? Would a std::multimap 161 // be any better? 162 163 // Stores pairs of predecessor basic blocks and the source registers of 164 // inserted copy instructions. 165 typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet; 166 SrcCopySet InsertedSrcCopySet; 167 168 // Maps pairs of predecessor basic blocks and colors to their defining copy 169 // instructions. 170 typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*> 171 SrcCopyMap; 172 SrcCopyMap InsertedSrcCopyMap; 173 174 // Maps inserted destination copy registers to their defining copy 175 // instructions. 176 typedef DenseMap<unsigned, MachineInstr*> DestCopyMap; 177 DestCopyMap InsertedDestCopies; 178 }; 179 180 struct MIIndexCompare { 181 MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } 182 183 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 184 return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); 185 } 186 187 LiveIntervals *LI; 188 }; 189 } // namespace 190 191 STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); 192 STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); 193 STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); 194 195 char StrongPHIElimination::ID = 0; 196 INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", 197 "Eliminate PHI nodes for register allocation, intelligently", false, false) 198 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 199 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 200 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 201 INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", 202 "Eliminate PHI nodes for register allocation, intelligently", false, false) 203 204 char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; 205 206 void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { 207 AU.setPreservesCFG(); 208 AU.addRequired<MachineDominatorTree>(); 209 AU.addRequired<SlotIndexes>(); 210 AU.addPreserved<SlotIndexes>(); 211 AU.addRequired<LiveIntervals>(); 212 AU.addPreserved<LiveIntervals>(); 213 MachineFunctionPass::getAnalysisUsage(AU); 214 } 215 216 static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { 217 // FIXME: This only needs to check from the first terminator, as only the 218 // first terminator can use a virtual register. 219 for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { 220 assert (RI != MBB->rend()); 221 MachineInstr *MI = &*RI; 222 223 for (MachineInstr::mop_iterator OI = MI->operands_begin(), 224 OE = MI->operands_end(); OI != OE; ++OI) { 225 MachineOperand &MO = *OI; 226 if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) 227 return &MO; 228 } 229 } 230 return NULL; 231 } 232 233 bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { 234 MRI = &MF.getRegInfo(); 235 TII = MF.getTarget().getInstrInfo(); 236 DT = &getAnalysis<MachineDominatorTree>(); 237 LI = &getAnalysis<LiveIntervals>(); 238 239 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); 240 I != E; ++I) { 241 for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); 242 BBI != BBE && BBI->isPHI(); ++BBI) { 243 unsigned DestReg = BBI->getOperand(0).getReg(); 244 addReg(DestReg); 245 PHISrcDefs[I].push_back(BBI); 246 247 for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { 248 MachineOperand &SrcMO = BBI->getOperand(i); 249 unsigned SrcReg = SrcMO.getReg(); 250 addReg(SrcReg); 251 unionRegs(DestReg, SrcReg); 252 253 MachineInstr *DefMI = MRI->getVRegDef(SrcReg); 254 if (DefMI) 255 PHISrcDefs[DefMI->getParent()].push_back(DefMI); 256 } 257 } 258 } 259 260 // Perform a depth-first traversal of the dominator tree, splitting 261 // interferences amongst PHI-congruence classes. 262 DenseMap<unsigned, unsigned> CurrentDominatingParent; 263 DenseMap<unsigned, unsigned> ImmediateDominatingParent; 264 for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), 265 DE = df_end(DT->getRootNode()); DI != DE; ++DI) { 266 SplitInterferencesForBasicBlock(*DI->getBlock(), 267 CurrentDominatingParent, 268 ImmediateDominatingParent); 269 } 270 271 // Insert copies for all PHI source and destination registers. 272 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); 273 I != E; ++I) { 274 for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); 275 BBI != BBE && BBI->isPHI(); ++BBI) { 276 InsertCopiesForPHI(BBI, I); 277 } 278 } 279 280 // FIXME: Preserve the equivalence classes during copy insertion and use 281 // the preversed equivalence classes instead of recomputing them. 282 RegNodeMap.clear(); 283 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); 284 I != E; ++I) { 285 for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); 286 BBI != BBE && BBI->isPHI(); ++BBI) { 287 unsigned DestReg = BBI->getOperand(0).getReg(); 288 addReg(DestReg); 289 290 for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { 291 unsigned SrcReg = BBI->getOperand(i).getReg(); 292 addReg(SrcReg); 293 unionRegs(DestReg, SrcReg); 294 } 295 } 296 } 297 298 DenseMap<unsigned, unsigned> RegRenamingMap; 299 bool Changed = false; 300 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); 301 I != E; ++I) { 302 MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); 303 while (BBI != BBE && BBI->isPHI()) { 304 MachineInstr *PHI = BBI; 305 306 assert(PHI->getNumOperands() > 0); 307 308 unsigned SrcReg = PHI->getOperand(1).getReg(); 309 unsigned SrcColor = getRegColor(SrcReg); 310 unsigned NewReg = RegRenamingMap[SrcColor]; 311 if (!NewReg) { 312 NewReg = SrcReg; 313 RegRenamingMap[SrcColor] = SrcReg; 314 } 315 MergeLIsAndRename(SrcReg, NewReg); 316 317 unsigned DestReg = PHI->getOperand(0).getReg(); 318 if (!InsertedDestCopies.count(DestReg)) 319 MergeLIsAndRename(DestReg, NewReg); 320 321 for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { 322 unsigned SrcReg = PHI->getOperand(i).getReg(); 323 MergeLIsAndRename(SrcReg, NewReg); 324 } 325 326 ++BBI; 327 LI->RemoveMachineInstrFromMaps(PHI); 328 PHI->eraseFromParent(); 329 Changed = true; 330 } 331 } 332 333 // Due to the insertion of copies to split live ranges, the live intervals are 334 // guaranteed to not overlap, except in one case: an original PHI source and a 335 // PHI destination copy. In this case, they have the same value and thus don't 336 // truly intersect, so we merge them into the value live at that point. 337 // FIXME: Is there some better way we can handle this? 338 for (DestCopyMap::iterator I = InsertedDestCopies.begin(), 339 E = InsertedDestCopies.end(); I != E; ++I) { 340 unsigned DestReg = I->first; 341 unsigned DestColor = getRegColor(DestReg); 342 unsigned NewReg = RegRenamingMap[DestColor]; 343 344 LiveInterval &DestLI = LI->getInterval(DestReg); 345 LiveInterval &NewLI = LI->getInterval(NewReg); 346 347 assert(DestLI.ranges.size() == 1 348 && "PHI destination copy's live interval should be a single live " 349 "range from the beginning of the BB to the copy instruction."); 350 LiveRange *DestLR = DestLI.begin(); 351 VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); 352 if (!NewVNI) { 353 NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); 354 MachineInstr *CopyInstr = I->second; 355 CopyInstr->getOperand(1).setIsKill(true); 356 } 357 358 LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); 359 NewLI.addRange(NewLR); 360 361 LI->removeInterval(DestReg); 362 MRI->replaceRegWith(DestReg, NewReg); 363 } 364 365 // Adjust the live intervals of all PHI source registers to handle the case 366 // where the PHIs in successor blocks were the only later uses of the source 367 // register. 368 for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), 369 E = InsertedSrcCopySet.end(); I != E; ++I) { 370 MachineBasicBlock *MBB = I->first; 371 unsigned SrcReg = I->second; 372 if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) 373 SrcReg = RenamedRegister; 374 375 LiveInterval &SrcLI = LI->getInterval(SrcReg); 376 377 bool isLiveOut = false; 378 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 379 SE = MBB->succ_end(); SI != SE; ++SI) { 380 if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { 381 isLiveOut = true; 382 break; 383 } 384 } 385 386 if (isLiveOut) 387 continue; 388 389 MachineOperand *LastUse = findLastUse(MBB, SrcReg); 390 assert(LastUse); 391 SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); 392 SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); 393 LastUse->setIsKill(true); 394 } 395 396 LI->renumber(); 397 398 Allocator.Reset(); 399 RegNodeMap.clear(); 400 PHISrcDefs.clear(); 401 InsertedSrcCopySet.clear(); 402 InsertedSrcCopyMap.clear(); 403 InsertedDestCopies.clear(); 404 405 return Changed; 406 } 407 408 void StrongPHIElimination::addReg(unsigned Reg) { 409 if (RegNodeMap.count(Reg)) 410 return; 411 RegNodeMap[Reg] = new (Allocator) Node(Reg); 412 } 413 414 StrongPHIElimination::Node* 415 StrongPHIElimination::Node::getLeader() { 416 Node *N = this; 417 Node *Parent = parent.getPointer(); 418 Node *Grandparent = Parent->parent.getPointer(); 419 420 while (Parent != Grandparent) { 421 N->parent.setPointer(Grandparent); 422 N = Grandparent; 423 Parent = Parent->parent.getPointer(); 424 Grandparent = Parent->parent.getPointer(); 425 } 426 427 return Parent; 428 } 429 430 unsigned StrongPHIElimination::getRegColor(unsigned Reg) { 431 DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg); 432 if (RI == RegNodeMap.end()) 433 return 0; 434 Node *Node = RI->second; 435 if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) 436 return 0; 437 return Node->getLeader()->value; 438 } 439 440 void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { 441 Node *Node1 = RegNodeMap[Reg1]->getLeader(); 442 Node *Node2 = RegNodeMap[Reg2]->getLeader(); 443 444 if (Node1->rank > Node2->rank) { 445 Node2->parent.setPointer(Node1->getLeader()); 446 } else if (Node1->rank < Node2->rank) { 447 Node1->parent.setPointer(Node2->getLeader()); 448 } else if (Node1 != Node2) { 449 Node2->parent.setPointer(Node1->getLeader()); 450 Node1->rank++; 451 } 452 } 453 454 void StrongPHIElimination::isolateReg(unsigned Reg) { 455 Node *Node = RegNodeMap[Reg]; 456 Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); 457 } 458 459 unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { 460 assert(PHI->isPHI()); 461 462 unsigned DestReg = PHI->getOperand(0).getReg(); 463 Node *DestNode = RegNodeMap[DestReg]; 464 if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) 465 return 0; 466 467 for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { 468 unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); 469 if (SrcColor) 470 return SrcColor; 471 } 472 return 0; 473 } 474 475 void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { 476 assert(PHI->isPHI()); 477 Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; 478 Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); 479 } 480 481 /// SplitInterferencesForBasicBlock - traverses a basic block, splitting any 482 /// interferences found between registers in the same congruence class. It 483 /// takes two DenseMaps as arguments that it also updates: 484 /// 485 /// 1) CurrentDominatingParent, which maps a color to the register in that 486 /// congruence class whose definition was most recently seen. 487 /// 488 /// 2) ImmediateDominatingParent, which maps a register to the register in the 489 /// same congruence class that most immediately dominates it. 490 /// 491 /// This function assumes that it is being called in a depth-first traversal 492 /// of the dominator tree. 493 /// 494 /// The algorithm used here is a generalization of the dominance-based SSA test 495 /// for two variables. If there are variables a_1, ..., a_n such that 496 /// 497 /// def(a_1) dom ... dom def(a_n), 498 /// 499 /// then we can test for an interference between any two a_i by only using O(n) 500 /// interference tests between pairs of variables. If i < j and a_i and a_j 501 /// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). 502 /// Thus, in order to test for an interference involving a_i, we need only check 503 /// for a potential interference with a_i+1. 504 /// 505 /// This method can be generalized to arbitrary sets of variables by performing 506 /// a depth-first traversal of the dominator tree. As we traverse down a branch 507 /// of the dominator tree, we keep track of the current dominating variable and 508 /// only perform an interference test with that variable. However, when we go to 509 /// another branch of the dominator tree, the definition of the current dominating 510 /// variable may no longer dominate the current block. In order to correct this, 511 /// we need to use a stack of past choices of the current dominating variable 512 /// and pop from this stack until we find a variable whose definition actually 513 /// dominates the current block. 514 /// 515 /// There will be one push on this stack for each variable that has become the 516 /// current dominating variable, so instead of using an explicit stack we can 517 /// simply associate the previous choice for a current dominating variable with 518 /// the new choice. This works better in our implementation, where we test for 519 /// interference in multiple distinct sets at once. 520 void 521 StrongPHIElimination::SplitInterferencesForBasicBlock( 522 MachineBasicBlock &MBB, 523 DenseMap<unsigned, unsigned> &CurrentDominatingParent, 524 DenseMap<unsigned, unsigned> &ImmediateDominatingParent) { 525 // Sort defs by their order in the original basic block, as the code below 526 // assumes that it is processing definitions in dominance order. 527 std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB]; 528 std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); 529 530 for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(), 531 BBE = DefInstrs.end(); BBI != BBE; ++BBI) { 532 for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), 533 E = (*BBI)->operands_end(); I != E; ++I) { 534 const MachineOperand &MO = *I; 535 536 // FIXME: This would be faster if it were possible to bail out of checking 537 // an instruction's operands after the explicit defs, but this is incorrect 538 // for variadic instructions, which may appear before register allocation 539 // in the future. 540 if (!MO.isReg() || !MO.isDef()) 541 continue; 542 543 unsigned DestReg = MO.getReg(); 544 if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) 545 continue; 546 547 // If the virtual register being defined is not used in any PHI or has 548 // already been isolated, then there are no more interferences to check. 549 unsigned DestColor = getRegColor(DestReg); 550 if (!DestColor) 551 continue; 552 553 // The input to this pass sometimes is not in SSA form in every basic 554 // block, as some virtual registers have redefinitions. We could eliminate 555 // this by fixing the passes that generate the non-SSA code, or we could 556 // handle it here by tracking defining machine instructions rather than 557 // virtual registers. For now, we just handle the situation conservatively 558 // in a way that will possibly lead to false interferences. 559 unsigned &CurrentParent = CurrentDominatingParent[DestColor]; 560 unsigned NewParent = CurrentParent; 561 if (NewParent == DestReg) 562 continue; 563 564 // Pop registers from the stack represented by ImmediateDominatingParent 565 // until we find a parent that dominates the current instruction. 566 while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) 567 || !getRegColor(NewParent))) 568 NewParent = ImmediateDominatingParent[NewParent]; 569 570 // If NewParent is nonzero, then its definition dominates the current 571 // instruction, so it is only necessary to check for the liveness of 572 // NewParent in order to check for an interference. 573 if (NewParent 574 && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { 575 // If there is an interference, always isolate the new register. This 576 // could be improved by using a heuristic that decides which of the two 577 // registers to isolate. 578 isolateReg(DestReg); 579 CurrentParent = NewParent; 580 } else { 581 // If there is no interference, update ImmediateDominatingParent and set 582 // the CurrentDominatingParent for this color to the current register. 583 ImmediateDominatingParent[DestReg] = NewParent; 584 CurrentParent = DestReg; 585 } 586 } 587 } 588 589 // We now walk the PHIs in successor blocks and check for interferences. This 590 // is necessary because the use of a PHI's operands are logically contained in 591 // the predecessor block. The def of a PHI's destination register is processed 592 // along with the other defs in a basic block. 593 594 CurrentPHIForColor.clear(); 595 596 for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), 597 SE = MBB.succ_end(); SI != SE; ++SI) { 598 for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); 599 BBI != BBE && BBI->isPHI(); ++BBI) { 600 MachineInstr *PHI = BBI; 601 602 // If a PHI is already isolated, either by being isolated directly or 603 // having all of its operands isolated, ignore it. 604 unsigned Color = getPHIColor(PHI); 605 if (!Color) 606 continue; 607 608 // Find the index of the PHI operand that corresponds to this basic block. 609 unsigned PredIndex; 610 for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { 611 if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) 612 break; 613 } 614 assert(PredIndex < PHI->getNumOperands()); 615 unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); 616 617 // Pop registers from the stack represented by ImmediateDominatingParent 618 // until we find a parent that dominates the current instruction. 619 unsigned &CurrentParent = CurrentDominatingParent[Color]; 620 unsigned NewParent = CurrentParent; 621 while (NewParent 622 && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) 623 || !getRegColor(NewParent))) 624 NewParent = ImmediateDominatingParent[NewParent]; 625 CurrentParent = NewParent; 626 627 // If there is an interference with a register, always isolate the 628 // register rather than the PHI. It is also possible to isolate the 629 // PHI, but that introduces copies for all of the registers involved 630 // in that PHI. 631 if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) 632 && NewParent != PredOperandReg) 633 isolateReg(NewParent); 634 635 std::pair<MachineInstr*, unsigned> 636 &CurrentPHI = CurrentPHIForColor[Color]; 637 638 // If two PHIs have the same operand from every shared predecessor, then 639 // they don't actually interfere. Otherwise, isolate the current PHI. This 640 // could possibly be improved, e.g. we could isolate the PHI with the 641 // fewest operands. 642 if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) 643 isolatePHI(PHI); 644 else 645 CurrentPHI = std::make_pair(PHI, PredOperandReg); 646 } 647 } 648 } 649 650 void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, 651 MachineBasicBlock *MBB) { 652 assert(PHI->isPHI()); 653 ++NumPHIsLowered; 654 unsigned PHIColor = getPHIColor(PHI); 655 656 for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { 657 MachineOperand &SrcMO = PHI->getOperand(i); 658 659 // If a source is defined by an implicit def, there is no need to insert a 660 // copy in the predecessor. 661 if (SrcMO.isUndef()) 662 continue; 663 664 unsigned SrcReg = SrcMO.getReg(); 665 assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && 666 "Machine PHI Operands must all be virtual registers!"); 667 668 MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); 669 unsigned SrcColor = getRegColor(SrcReg); 670 671 // If neither the PHI nor the operand were isolated, then we only need to 672 // set the phi-kill flag on the VNInfo at this PHI. 673 if (PHIColor && SrcColor == PHIColor) { 674 LiveInterval &SrcInterval = LI->getInterval(SrcReg); 675 SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); 676 VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex()); 677 assert(SrcVNI); 678 SrcVNI->setHasPHIKill(true); 679 continue; 680 } 681 682 unsigned CopyReg = 0; 683 if (PHIColor) { 684 SrcCopyMap::const_iterator I 685 = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); 686 CopyReg 687 = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; 688 } 689 690 if (!CopyReg) { 691 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 692 CopyReg = MRI->createVirtualRegister(RC); 693 694 MachineBasicBlock::iterator 695 CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); 696 unsigned SrcSubReg = SrcMO.getSubReg(); 697 MachineInstr *CopyInstr = BuildMI(*PredBB, 698 CopyInsertPoint, 699 PHI->getDebugLoc(), 700 TII->get(TargetOpcode::COPY), 701 CopyReg).addReg(SrcReg, 0, SrcSubReg); 702 LI->InsertMachineInstrInMaps(CopyInstr); 703 ++NumSrcCopiesInserted; 704 705 // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for 706 // the newly added range. 707 LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); 708 InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); 709 710 addReg(CopyReg); 711 if (PHIColor) { 712 unionRegs(PHIColor, CopyReg); 713 assert(getRegColor(CopyReg) != CopyReg); 714 } else { 715 PHIColor = CopyReg; 716 assert(getRegColor(CopyReg) == CopyReg); 717 } 718 719 if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor))) 720 InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr; 721 } 722 723 SrcMO.setReg(CopyReg); 724 725 // If SrcReg is not live beyond the PHI, trim its interval so that it is no 726 // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are 727 // processed later, but this is still correct to do at this point because we 728 // never rely on LiveIntervals being correct while inserting copies. 729 // FIXME: Should this just count uses at PHIs like the normal PHIElimination 730 // pass does? 731 LiveInterval &SrcLI = LI->getInterval(SrcReg); 732 SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); 733 SlotIndex PHIIndex = LI->getInstructionIndex(PHI); 734 SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); 735 if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) 736 SrcLI.removeRange(MBBStartIndex, PHIIndex, true); 737 } 738 739 unsigned DestReg = PHI->getOperand(0).getReg(); 740 unsigned DestColor = getRegColor(DestReg); 741 742 if (PHIColor && DestColor == PHIColor) { 743 LiveInterval &DestLI = LI->getInterval(DestReg); 744 745 // Set the phi-def flag for the VN at this PHI. 746 SlotIndex PHIIndex = LI->getInstructionIndex(PHI); 747 VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex()); 748 assert(DestVNI); 749 DestVNI->setIsPHIDef(true); 750 751 // Prior to PHI elimination, the live ranges of PHIs begin at their defining 752 // instruction. After PHI elimination, PHI instructions are replaced by VNs 753 // with the phi-def flag set, and the live ranges of these VNs start at the 754 // beginning of the basic block. 755 SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); 756 DestVNI->def = MBBStartIndex; 757 DestLI.addRange(LiveRange(MBBStartIndex, 758 PHIIndex.getDefIndex(), 759 DestVNI)); 760 return; 761 } 762 763 const TargetRegisterClass *RC = MRI->getRegClass(DestReg); 764 unsigned CopyReg = MRI->createVirtualRegister(RC); 765 766 MachineInstr *CopyInstr = BuildMI(*MBB, 767 MBB->SkipPHIsAndLabels(MBB->begin()), 768 PHI->getDebugLoc(), 769 TII->get(TargetOpcode::COPY), 770 DestReg).addReg(CopyReg); 771 LI->InsertMachineInstrInMaps(CopyInstr); 772 PHI->getOperand(0).setReg(CopyReg); 773 ++NumDestCopiesInserted; 774 775 // Add the region from the beginning of MBB to the copy instruction to 776 // CopyReg's live interval, and give the VNInfo the phidef flag. 777 LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); 778 SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); 779 SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); 780 VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, 781 CopyInstr, 782 LI->getVNInfoAllocator()); 783 CopyVNI->setIsPHIDef(true); 784 CopyLI.addRange(LiveRange(MBBStartIndex, 785 DestCopyIndex.getDefIndex(), 786 CopyVNI)); 787 788 // Adjust DestReg's live interval to adjust for its new definition at 789 // CopyInstr. 790 LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); 791 SlotIndex PHIIndex = LI->getInstructionIndex(PHI); 792 DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex()); 793 794 VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex()); 795 assert(DestVNI); 796 DestVNI->def = DestCopyIndex.getDefIndex(); 797 798 InsertedDestCopies[CopyReg] = CopyInstr; 799 } 800 801 void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { 802 if (Reg == NewReg) 803 return; 804 805 LiveInterval &OldLI = LI->getInterval(Reg); 806 LiveInterval &NewLI = LI->getInterval(NewReg); 807 808 // Merge the live ranges of the two registers. 809 DenseMap<VNInfo*, VNInfo*> VNMap; 810 for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); 811 LRI != LRE; ++LRI) { 812 LiveRange OldLR = *LRI; 813 VNInfo *OldVN = OldLR.valno; 814 815 VNInfo *&NewVN = VNMap[OldVN]; 816 if (!NewVN) { 817 NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); 818 VNMap[OldVN] = NewVN; 819 } 820 821 LiveRange LR(OldLR.start, OldLR.end, NewVN); 822 NewLI.addRange(LR); 823 } 824 825 // Remove the LiveInterval for the register being renamed and replace all 826 // of its defs and uses with the new register. 827 LI->removeInterval(Reg); 828 MRI->replaceRegWith(Reg, NewReg); 829 } 830