1 //===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass eliminates PHI instructions by aggressively coalescing the copies 11 // that would be inserted by a naive algorithm and only inserting the copies 12 // that are necessary. The coalescing technique initially assumes that all 13 // registers appearing in a PHI instruction do not interfere. It then eliminates 14 // proven interferences, using dominators to only perform a linear number of 15 // interference tests instead of the quadratic number of interference tests 16 // that this would naively require. This is a technique derived from: 17 // 18 // Budimlic, et al. Fast copy coalescing and live-range identification. 19 // In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language 20 // Design and Implementation (Berlin, Germany, June 17 - 19, 2002). 21 // PLDI '02. ACM, New York, NY, 25-32. 22 // 23 // The original implementation constructs a data structure they call a dominance 24 // forest for this purpose. The dominance forest was shown to be unnecessary, 25 // as it is possible to emulate the creation and traversal of a dominance forest 26 // by directly using the dominator tree, rather than actually constructing the 27 // dominance forest. This technique is explained in: 28 // 29 // Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code 30 // Quality and Efficiency, 31 // In Proceedings of the 7th annual IEEE/ACM International Symposium on Code 32 // Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). 33 // CGO '09. IEEE, Washington, DC, 114-125. 34 // 35 // Careful implementation allows for all of the dominator forest interference 36 // checks to be performed at once in a single depth-first traversal of the 37 // dominator tree, which is what is implemented here. 38 // 39 //===----------------------------------------------------------------------===// 40 41 #define DEBUG_TYPE "strongphielim" 42 #include "PHIEliminationUtils.h" 43 #include "llvm/CodeGen/Passes.h" 44 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 45 #include "llvm/CodeGen/MachineDominators.h" 46 #include "llvm/CodeGen/MachineFunctionPass.h" 47 #include "llvm/CodeGen/MachineInstrBuilder.h" 48 #include "llvm/CodeGen/MachineRegisterInfo.h" 49 #include "llvm/Target/TargetInstrInfo.h" 50 #include "llvm/ADT/DenseSet.h" 51 #include "llvm/ADT/Statistic.h" 52 #include "llvm/Support/Debug.h" 53 using namespace llvm; 54 55 namespace { 56 class StrongPHIElimination : public MachineFunctionPass { 57 public: 58 static char ID; // Pass identification, replacement for typeid 59 StrongPHIElimination() : MachineFunctionPass(ID) { 60 initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); 61 } 62 63 virtual void getAnalysisUsage(AnalysisUsage&) const; 64 bool runOnMachineFunction(MachineFunction&); 65 66 private: 67 /// This struct represents a single node in the union-find data structure 68 /// representing the variable congruence classes. There is one difference 69 /// from a normal union-find data structure. We steal two bits from the parent 70 /// pointer . One of these bits is used to represent whether the register 71 /// itself has been isolated, and the other is used to represent whether the 72 /// PHI with that register as its destination has been isolated. 73 /// 74 /// Note that this leads to the strange situation where the leader of a 75 /// congruence class may no longer logically be a member, due to being 76 /// isolated. 77 struct Node { 78 enum Flags { 79 kRegisterIsolatedFlag = 1, 80 kPHIIsolatedFlag = 2 81 }; 82 Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } 83 84 Node *getLeader(); 85 86 PointerIntPair<Node*, 2> parent; 87 unsigned value; 88 unsigned rank; 89 }; 90 91 /// Add a register in a new congruence class containing only itself. 92 void addReg(unsigned); 93 94 /// Join the congruence classes of two registers. This function is biased 95 /// towards the left argument, i.e. after 96 /// 97 /// addReg(r2); 98 /// unionRegs(r1, r2); 99 /// 100 /// the leader of the unioned congruence class is the same as the leader of 101 /// r1's congruence class prior to the union. This is actually relied upon 102 /// in the copy insertion code. 103 void unionRegs(unsigned, unsigned); 104 105 /// Get the color of a register. The color is 0 if the register has been 106 /// isolated. 107 unsigned getRegColor(unsigned); 108 109 // Isolate a register. 110 void isolateReg(unsigned); 111 112 /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been 113 /// isolated. Otherwise, it is the original color of its destination and 114 /// all of its operands (before they were isolated, if they were). 115 unsigned getPHIColor(MachineInstr*); 116 117 /// Isolate a PHI. 118 void isolatePHI(MachineInstr*); 119 120 /// Traverses a basic block, splitting any interferences found between 121 /// registers in the same congruence class. It takes two DenseMaps as 122 /// arguments that it also updates: CurrentDominatingParent, which maps 123 /// a color to the register in that congruence class whose definition was 124 /// most recently seen, and ImmediateDominatingParent, which maps a register 125 /// to the register in the same congruence class that most immediately 126 /// dominates it. 127 /// 128 /// This function assumes that it is being called in a depth-first traversal 129 /// of the dominator tree. 130 void SplitInterferencesForBasicBlock( 131 MachineBasicBlock&, 132 DenseMap<unsigned, unsigned> &CurrentDominatingParent, 133 DenseMap<unsigned, unsigned> &ImmediateDominatingParent); 134 135 // Lowers a PHI instruction, inserting copies of the source and destination 136 // registers as necessary. 137 void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); 138 139 // Merges the live interval of Reg into NewReg and renames Reg to NewReg 140 // everywhere that Reg appears. Requires Reg and NewReg to have non- 141 // overlapping lifetimes. 142 void MergeLIsAndRename(unsigned Reg, unsigned NewReg); 143 144 MachineRegisterInfo *MRI; 145 const TargetInstrInfo *TII; 146 MachineDominatorTree *DT; 147 LiveIntervals *LI; 148 149 BumpPtrAllocator Allocator; 150 151 DenseMap<unsigned, Node*> RegNodeMap; 152 153 // Maps a basic block to a list of its defs of registers that appear as PHI 154 // sources. 155 DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs; 156 157 // Maps a color to a pair of a MachineInstr* and a virtual register, which 158 // is the operand of that PHI corresponding to the current basic block. 159 DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor; 160 161 // FIXME: Can these two data structures be combined? Would a std::multimap 162 // be any better? 163 164 // Stores pairs of predecessor basic blocks and the source registers of 165 // inserted copy instructions. 166 typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet; 167 SrcCopySet InsertedSrcCopySet; 168 169 // Maps pairs of predecessor basic blocks and colors to their defining copy 170 // instructions. 171 typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*> 172 SrcCopyMap; 173 SrcCopyMap InsertedSrcCopyMap; 174 175 // Maps inserted destination copy registers to their defining copy 176 // instructions. 177 typedef DenseMap<unsigned, MachineInstr*> DestCopyMap; 178 DestCopyMap InsertedDestCopies; 179 }; 180 181 struct MIIndexCompare { 182 MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } 183 184 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 185 return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); 186 } 187 188 LiveIntervals *LI; 189 }; 190 } // namespace 191 192 STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); 193 STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); 194 STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); 195 196 char StrongPHIElimination::ID = 0; 197 INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", 198 "Eliminate PHI nodes for register allocation, intelligently", false, false) 199 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 200 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 201 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 202 INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", 203 "Eliminate PHI nodes for register allocation, intelligently", false, false) 204 205 char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; 206 207 void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { 208 AU.setPreservesCFG(); 209 AU.addRequired<MachineDominatorTree>(); 210 AU.addRequired<SlotIndexes>(); 211 AU.addPreserved<SlotIndexes>(); 212 AU.addRequired<LiveIntervals>(); 213 AU.addPreserved<LiveIntervals>(); 214 MachineFunctionPass::getAnalysisUsage(AU); 215 } 216 217 static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { 218 // FIXME: This only needs to check from the first terminator, as only the 219 // first terminator can use a virtual register. 220 for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { 221 assert (RI != MBB->rend()); 222 MachineInstr *MI = &*RI; 223 224 for (MachineInstr::mop_iterator OI = MI->operands_begin(), 225 OE = MI->operands_end(); OI != OE; ++OI) { 226 MachineOperand &MO = *OI; 227 if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) 228 return &MO; 229 } 230 } 231 return NULL; 232 } 233 234 bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { 235 MRI = &MF.getRegInfo(); 236 TII = MF.getTarget().getInstrInfo(); 237 DT = &getAnalysis<MachineDominatorTree>(); 238 LI = &getAnalysis<LiveIntervals>(); 239 240 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); 241 I != E; ++I) { 242 for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); 243 BBI != BBE && BBI->isPHI(); ++BBI) { 244 unsigned DestReg = BBI->getOperand(0).getReg(); 245 addReg(DestReg); 246 PHISrcDefs[I].push_back(BBI); 247 248 for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { 249 MachineOperand &SrcMO = BBI->getOperand(i); 250 unsigned SrcReg = SrcMO.getReg(); 251 addReg(SrcReg); 252 unionRegs(DestReg, SrcReg); 253 254 MachineInstr *DefMI = MRI->getVRegDef(SrcReg); 255 if (DefMI) 256 PHISrcDefs[DefMI->getParent()].push_back(DefMI); 257 } 258 } 259 } 260 261 // Perform a depth-first traversal of the dominator tree, splitting 262 // interferences amongst PHI-congruence classes. 263 DenseMap<unsigned, unsigned> CurrentDominatingParent; 264 DenseMap<unsigned, unsigned> ImmediateDominatingParent; 265 for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), 266 DE = df_end(DT->getRootNode()); DI != DE; ++DI) { 267 SplitInterferencesForBasicBlock(*DI->getBlock(), 268 CurrentDominatingParent, 269 ImmediateDominatingParent); 270 } 271 272 // Insert copies for all PHI source and destination registers. 273 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); 274 I != E; ++I) { 275 for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); 276 BBI != BBE && BBI->isPHI(); ++BBI) { 277 InsertCopiesForPHI(BBI, I); 278 } 279 } 280 281 // FIXME: Preserve the equivalence classes during copy insertion and use 282 // the preversed equivalence classes instead of recomputing them. 283 RegNodeMap.clear(); 284 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); 285 I != E; ++I) { 286 for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); 287 BBI != BBE && BBI->isPHI(); ++BBI) { 288 unsigned DestReg = BBI->getOperand(0).getReg(); 289 addReg(DestReg); 290 291 for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { 292 unsigned SrcReg = BBI->getOperand(i).getReg(); 293 addReg(SrcReg); 294 unionRegs(DestReg, SrcReg); 295 } 296 } 297 } 298 299 DenseMap<unsigned, unsigned> RegRenamingMap; 300 bool Changed = false; 301 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); 302 I != E; ++I) { 303 MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); 304 while (BBI != BBE && BBI->isPHI()) { 305 MachineInstr *PHI = BBI; 306 307 assert(PHI->getNumOperands() > 0); 308 309 unsigned SrcReg = PHI->getOperand(1).getReg(); 310 unsigned SrcColor = getRegColor(SrcReg); 311 unsigned NewReg = RegRenamingMap[SrcColor]; 312 if (!NewReg) { 313 NewReg = SrcReg; 314 RegRenamingMap[SrcColor] = SrcReg; 315 } 316 MergeLIsAndRename(SrcReg, NewReg); 317 318 unsigned DestReg = PHI->getOperand(0).getReg(); 319 if (!InsertedDestCopies.count(DestReg)) 320 MergeLIsAndRename(DestReg, NewReg); 321 322 for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { 323 unsigned SrcReg = PHI->getOperand(i).getReg(); 324 MergeLIsAndRename(SrcReg, NewReg); 325 } 326 327 ++BBI; 328 LI->RemoveMachineInstrFromMaps(PHI); 329 PHI->eraseFromParent(); 330 Changed = true; 331 } 332 } 333 334 // Due to the insertion of copies to split live ranges, the live intervals are 335 // guaranteed to not overlap, except in one case: an original PHI source and a 336 // PHI destination copy. In this case, they have the same value and thus don't 337 // truly intersect, so we merge them into the value live at that point. 338 // FIXME: Is there some better way we can handle this? 339 for (DestCopyMap::iterator I = InsertedDestCopies.begin(), 340 E = InsertedDestCopies.end(); I != E; ++I) { 341 unsigned DestReg = I->first; 342 unsigned DestColor = getRegColor(DestReg); 343 unsigned NewReg = RegRenamingMap[DestColor]; 344 345 LiveInterval &DestLI = LI->getInterval(DestReg); 346 LiveInterval &NewLI = LI->getInterval(NewReg); 347 348 assert(DestLI.ranges.size() == 1 349 && "PHI destination copy's live interval should be a single live " 350 "range from the beginning of the BB to the copy instruction."); 351 LiveRange *DestLR = DestLI.begin(); 352 VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); 353 if (!NewVNI) { 354 NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); 355 MachineInstr *CopyInstr = I->second; 356 CopyInstr->getOperand(1).setIsKill(true); 357 } 358 359 LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); 360 NewLI.addRange(NewLR); 361 362 LI->removeInterval(DestReg); 363 MRI->replaceRegWith(DestReg, NewReg); 364 } 365 366 // Adjust the live intervals of all PHI source registers to handle the case 367 // where the PHIs in successor blocks were the only later uses of the source 368 // register. 369 for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), 370 E = InsertedSrcCopySet.end(); I != E; ++I) { 371 MachineBasicBlock *MBB = I->first; 372 unsigned SrcReg = I->second; 373 if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) 374 SrcReg = RenamedRegister; 375 376 LiveInterval &SrcLI = LI->getInterval(SrcReg); 377 378 bool isLiveOut = false; 379 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 380 SE = MBB->succ_end(); SI != SE; ++SI) { 381 if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { 382 isLiveOut = true; 383 break; 384 } 385 } 386 387 if (isLiveOut) 388 continue; 389 390 MachineOperand *LastUse = findLastUse(MBB, SrcReg); 391 assert(LastUse); 392 SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); 393 SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); 394 LastUse->setIsKill(true); 395 } 396 397 LI->renumber(); 398 399 Allocator.Reset(); 400 RegNodeMap.clear(); 401 PHISrcDefs.clear(); 402 InsertedSrcCopySet.clear(); 403 InsertedSrcCopyMap.clear(); 404 InsertedDestCopies.clear(); 405 406 return Changed; 407 } 408 409 void StrongPHIElimination::addReg(unsigned Reg) { 410 if (RegNodeMap.count(Reg)) 411 return; 412 RegNodeMap[Reg] = new (Allocator) Node(Reg); 413 } 414 415 StrongPHIElimination::Node* 416 StrongPHIElimination::Node::getLeader() { 417 Node *N = this; 418 Node *Parent = parent.getPointer(); 419 Node *Grandparent = Parent->parent.getPointer(); 420 421 while (Parent != Grandparent) { 422 N->parent.setPointer(Grandparent); 423 N = Grandparent; 424 Parent = Parent->parent.getPointer(); 425 Grandparent = Parent->parent.getPointer(); 426 } 427 428 return Parent; 429 } 430 431 unsigned StrongPHIElimination::getRegColor(unsigned Reg) { 432 DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg); 433 if (RI == RegNodeMap.end()) 434 return 0; 435 Node *Node = RI->second; 436 if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) 437 return 0; 438 return Node->getLeader()->value; 439 } 440 441 void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { 442 Node *Node1 = RegNodeMap[Reg1]->getLeader(); 443 Node *Node2 = RegNodeMap[Reg2]->getLeader(); 444 445 if (Node1->rank > Node2->rank) { 446 Node2->parent.setPointer(Node1->getLeader()); 447 } else if (Node1->rank < Node2->rank) { 448 Node1->parent.setPointer(Node2->getLeader()); 449 } else if (Node1 != Node2) { 450 Node2->parent.setPointer(Node1->getLeader()); 451 Node1->rank++; 452 } 453 } 454 455 void StrongPHIElimination::isolateReg(unsigned Reg) { 456 Node *Node = RegNodeMap[Reg]; 457 Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); 458 } 459 460 unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { 461 assert(PHI->isPHI()); 462 463 unsigned DestReg = PHI->getOperand(0).getReg(); 464 Node *DestNode = RegNodeMap[DestReg]; 465 if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) 466 return 0; 467 468 for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { 469 unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); 470 if (SrcColor) 471 return SrcColor; 472 } 473 return 0; 474 } 475 476 void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { 477 assert(PHI->isPHI()); 478 Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; 479 Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); 480 } 481 482 /// SplitInterferencesForBasicBlock - traverses a basic block, splitting any 483 /// interferences found between registers in the same congruence class. It 484 /// takes two DenseMaps as arguments that it also updates: 485 /// 486 /// 1) CurrentDominatingParent, which maps a color to the register in that 487 /// congruence class whose definition was most recently seen. 488 /// 489 /// 2) ImmediateDominatingParent, which maps a register to the register in the 490 /// same congruence class that most immediately dominates it. 491 /// 492 /// This function assumes that it is being called in a depth-first traversal 493 /// of the dominator tree. 494 /// 495 /// The algorithm used here is a generalization of the dominance-based SSA test 496 /// for two variables. If there are variables a_1, ..., a_n such that 497 /// 498 /// def(a_1) dom ... dom def(a_n), 499 /// 500 /// then we can test for an interference between any two a_i by only using O(n) 501 /// interference tests between pairs of variables. If i < j and a_i and a_j 502 /// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). 503 /// Thus, in order to test for an interference involving a_i, we need only check 504 /// for a potential interference with a_i+1. 505 /// 506 /// This method can be generalized to arbitrary sets of variables by performing 507 /// a depth-first traversal of the dominator tree. As we traverse down a branch 508 /// of the dominator tree, we keep track of the current dominating variable and 509 /// only perform an interference test with that variable. However, when we go to 510 /// another branch of the dominator tree, the definition of the current dominating 511 /// variable may no longer dominate the current block. In order to correct this, 512 /// we need to use a stack of past choices of the current dominating variable 513 /// and pop from this stack until we find a variable whose definition actually 514 /// dominates the current block. 515 /// 516 /// There will be one push on this stack for each variable that has become the 517 /// current dominating variable, so instead of using an explicit stack we can 518 /// simply associate the previous choice for a current dominating variable with 519 /// the new choice. This works better in our implementation, where we test for 520 /// interference in multiple distinct sets at once. 521 void 522 StrongPHIElimination::SplitInterferencesForBasicBlock( 523 MachineBasicBlock &MBB, 524 DenseMap<unsigned, unsigned> &CurrentDominatingParent, 525 DenseMap<unsigned, unsigned> &ImmediateDominatingParent) { 526 // Sort defs by their order in the original basic block, as the code below 527 // assumes that it is processing definitions in dominance order. 528 std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB]; 529 std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); 530 531 for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(), 532 BBE = DefInstrs.end(); BBI != BBE; ++BBI) { 533 for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), 534 E = (*BBI)->operands_end(); I != E; ++I) { 535 const MachineOperand &MO = *I; 536 537 // FIXME: This would be faster if it were possible to bail out of checking 538 // an instruction's operands after the explicit defs, but this is incorrect 539 // for variadic instructions, which may appear before register allocation 540 // in the future. 541 if (!MO.isReg() || !MO.isDef()) 542 continue; 543 544 unsigned DestReg = MO.getReg(); 545 if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) 546 continue; 547 548 // If the virtual register being defined is not used in any PHI or has 549 // already been isolated, then there are no more interferences to check. 550 unsigned DestColor = getRegColor(DestReg); 551 if (!DestColor) 552 continue; 553 554 // The input to this pass sometimes is not in SSA form in every basic 555 // block, as some virtual registers have redefinitions. We could eliminate 556 // this by fixing the passes that generate the non-SSA code, or we could 557 // handle it here by tracking defining machine instructions rather than 558 // virtual registers. For now, we just handle the situation conservatively 559 // in a way that will possibly lead to false interferences. 560 unsigned &CurrentParent = CurrentDominatingParent[DestColor]; 561 unsigned NewParent = CurrentParent; 562 if (NewParent == DestReg) 563 continue; 564 565 // Pop registers from the stack represented by ImmediateDominatingParent 566 // until we find a parent that dominates the current instruction. 567 while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) 568 || !getRegColor(NewParent))) 569 NewParent = ImmediateDominatingParent[NewParent]; 570 571 // If NewParent is nonzero, then its definition dominates the current 572 // instruction, so it is only necessary to check for the liveness of 573 // NewParent in order to check for an interference. 574 if (NewParent 575 && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { 576 // If there is an interference, always isolate the new register. This 577 // could be improved by using a heuristic that decides which of the two 578 // registers to isolate. 579 isolateReg(DestReg); 580 CurrentParent = NewParent; 581 } else { 582 // If there is no interference, update ImmediateDominatingParent and set 583 // the CurrentDominatingParent for this color to the current register. 584 ImmediateDominatingParent[DestReg] = NewParent; 585 CurrentParent = DestReg; 586 } 587 } 588 } 589 590 // We now walk the PHIs in successor blocks and check for interferences. This 591 // is necessary because the use of a PHI's operands are logically contained in 592 // the predecessor block. The def of a PHI's destination register is processed 593 // along with the other defs in a basic block. 594 595 CurrentPHIForColor.clear(); 596 597 for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), 598 SE = MBB.succ_end(); SI != SE; ++SI) { 599 for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); 600 BBI != BBE && BBI->isPHI(); ++BBI) { 601 MachineInstr *PHI = BBI; 602 603 // If a PHI is already isolated, either by being isolated directly or 604 // having all of its operands isolated, ignore it. 605 unsigned Color = getPHIColor(PHI); 606 if (!Color) 607 continue; 608 609 // Find the index of the PHI operand that corresponds to this basic block. 610 unsigned PredIndex; 611 for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { 612 if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) 613 break; 614 } 615 assert(PredIndex < PHI->getNumOperands()); 616 unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); 617 618 // Pop registers from the stack represented by ImmediateDominatingParent 619 // until we find a parent that dominates the current instruction. 620 unsigned &CurrentParent = CurrentDominatingParent[Color]; 621 unsigned NewParent = CurrentParent; 622 while (NewParent 623 && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) 624 || !getRegColor(NewParent))) 625 NewParent = ImmediateDominatingParent[NewParent]; 626 CurrentParent = NewParent; 627 628 // If there is an interference with a register, always isolate the 629 // register rather than the PHI. It is also possible to isolate the 630 // PHI, but that introduces copies for all of the registers involved 631 // in that PHI. 632 if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) 633 && NewParent != PredOperandReg) 634 isolateReg(NewParent); 635 636 std::pair<MachineInstr*, unsigned> 637 &CurrentPHI = CurrentPHIForColor[Color]; 638 639 // If two PHIs have the same operand from every shared predecessor, then 640 // they don't actually interfere. Otherwise, isolate the current PHI. This 641 // could possibly be improved, e.g. we could isolate the PHI with the 642 // fewest operands. 643 if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) 644 isolatePHI(PHI); 645 else 646 CurrentPHI = std::make_pair(PHI, PredOperandReg); 647 } 648 } 649 } 650 651 void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, 652 MachineBasicBlock *MBB) { 653 assert(PHI->isPHI()); 654 ++NumPHIsLowered; 655 unsigned PHIColor = getPHIColor(PHI); 656 657 for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { 658 MachineOperand &SrcMO = PHI->getOperand(i); 659 660 // If a source is defined by an implicit def, there is no need to insert a 661 // copy in the predecessor. 662 if (SrcMO.isUndef()) 663 continue; 664 665 unsigned SrcReg = SrcMO.getReg(); 666 assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && 667 "Machine PHI Operands must all be virtual registers!"); 668 669 MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); 670 unsigned SrcColor = getRegColor(SrcReg); 671 672 // If neither the PHI nor the operand were isolated, then we only need to 673 // set the phi-kill flag on the VNInfo at this PHI. 674 if (PHIColor && SrcColor == PHIColor) { 675 LiveInterval &SrcInterval = LI->getInterval(SrcReg); 676 SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); 677 VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); 678 assert(SrcVNI); 679 SrcVNI->setHasPHIKill(true); 680 continue; 681 } 682 683 unsigned CopyReg = 0; 684 if (PHIColor) { 685 SrcCopyMap::const_iterator I 686 = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); 687 CopyReg 688 = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; 689 } 690 691 if (!CopyReg) { 692 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 693 CopyReg = MRI->createVirtualRegister(RC); 694 695 MachineBasicBlock::iterator 696 CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); 697 unsigned SrcSubReg = SrcMO.getSubReg(); 698 MachineInstr *CopyInstr = BuildMI(*PredBB, 699 CopyInsertPoint, 700 PHI->getDebugLoc(), 701 TII->get(TargetOpcode::COPY), 702 CopyReg).addReg(SrcReg, 0, SrcSubReg); 703 LI->InsertMachineInstrInMaps(CopyInstr); 704 ++NumSrcCopiesInserted; 705 706 // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for 707 // the newly added range. 708 LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); 709 InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); 710 711 addReg(CopyReg); 712 if (PHIColor) { 713 unionRegs(PHIColor, CopyReg); 714 assert(getRegColor(CopyReg) != CopyReg); 715 } else { 716 PHIColor = CopyReg; 717 assert(getRegColor(CopyReg) == CopyReg); 718 } 719 720 if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor))) 721 InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr; 722 } 723 724 SrcMO.setReg(CopyReg); 725 726 // If SrcReg is not live beyond the PHI, trim its interval so that it is no 727 // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are 728 // processed later, but this is still correct to do at this point because we 729 // never rely on LiveIntervals being correct while inserting copies. 730 // FIXME: Should this just count uses at PHIs like the normal PHIElimination 731 // pass does? 732 LiveInterval &SrcLI = LI->getInterval(SrcReg); 733 SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); 734 SlotIndex PHIIndex = LI->getInstructionIndex(PHI); 735 SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); 736 if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) 737 SrcLI.removeRange(MBBStartIndex, PHIIndex, true); 738 } 739 740 unsigned DestReg = PHI->getOperand(0).getReg(); 741 unsigned DestColor = getRegColor(DestReg); 742 743 if (PHIColor && DestColor == PHIColor) { 744 LiveInterval &DestLI = LI->getInterval(DestReg); 745 746 // Set the phi-def flag for the VN at this PHI. 747 SlotIndex PHIIndex = LI->getInstructionIndex(PHI); 748 VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex()); 749 assert(DestVNI); 750 DestVNI->setIsPHIDef(true); 751 752 // Prior to PHI elimination, the live ranges of PHIs begin at their defining 753 // instruction. After PHI elimination, PHI instructions are replaced by VNs 754 // with the phi-def flag set, and the live ranges of these VNs start at the 755 // beginning of the basic block. 756 SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); 757 DestVNI->def = MBBStartIndex; 758 DestLI.addRange(LiveRange(MBBStartIndex, 759 PHIIndex.getDefIndex(), 760 DestVNI)); 761 return; 762 } 763 764 const TargetRegisterClass *RC = MRI->getRegClass(DestReg); 765 unsigned CopyReg = MRI->createVirtualRegister(RC); 766 767 MachineInstr *CopyInstr = BuildMI(*MBB, 768 MBB->SkipPHIsAndLabels(MBB->begin()), 769 PHI->getDebugLoc(), 770 TII->get(TargetOpcode::COPY), 771 DestReg).addReg(CopyReg); 772 LI->InsertMachineInstrInMaps(CopyInstr); 773 PHI->getOperand(0).setReg(CopyReg); 774 ++NumDestCopiesInserted; 775 776 // Add the region from the beginning of MBB to the copy instruction to 777 // CopyReg's live interval, and give the VNInfo the phidef flag. 778 LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); 779 SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); 780 SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); 781 VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, 782 CopyInstr, 783 LI->getVNInfoAllocator()); 784 CopyVNI->setIsPHIDef(true); 785 CopyLI.addRange(LiveRange(MBBStartIndex, 786 DestCopyIndex.getDefIndex(), 787 CopyVNI)); 788 789 // Adjust DestReg's live interval to adjust for its new definition at 790 // CopyInstr. 791 LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); 792 SlotIndex PHIIndex = LI->getInstructionIndex(PHI); 793 DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex()); 794 795 VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex()); 796 assert(DestVNI); 797 DestVNI->def = DestCopyIndex.getDefIndex(); 798 799 InsertedDestCopies[CopyReg] = CopyInstr; 800 } 801 802 void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { 803 if (Reg == NewReg) 804 return; 805 806 LiveInterval &OldLI = LI->getInterval(Reg); 807 LiveInterval &NewLI = LI->getInterval(NewReg); 808 809 // Merge the live ranges of the two registers. 810 DenseMap<VNInfo*, VNInfo*> VNMap; 811 for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); 812 LRI != LRE; ++LRI) { 813 LiveRange OldLR = *LRI; 814 VNInfo *OldVN = OldLR.valno; 815 816 VNInfo *&NewVN = VNMap[OldVN]; 817 if (!NewVN) { 818 NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); 819 VNMap[OldVN] = NewVN; 820 } 821 822 LiveRange LR(OldLR.start, OldLR.end, NewVN); 823 NewLI.addRange(LR); 824 } 825 826 // Remove the LiveInterval for the register being renamed and replace all 827 // of its defs and uses with the new register. 828 LI->removeInterval(Reg); 829 MRI->replaceRegWith(Reg, NewReg); 830 } 831