Home | History | Annotate | Download | only in CodeGen
      1 //===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass eliminates PHI instructions by aggressively coalescing the copies
     11 // that would be inserted by a naive algorithm and only inserting the copies
     12 // that are necessary. The coalescing technique initially assumes that all
     13 // registers appearing in a PHI instruction do not interfere. It then eliminates
     14 // proven interferences, using dominators to only perform a linear number of
     15 // interference tests instead of the quadratic number of interference tests
     16 // that this would naively require. This is a technique derived from:
     17 //
     18 //    Budimlic, et al. Fast copy coalescing and live-range identification.
     19 //    In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
     20 //    Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
     21 //    PLDI '02. ACM, New York, NY, 25-32.
     22 //
     23 // The original implementation constructs a data structure they call a dominance
     24 // forest for this purpose. The dominance forest was shown to be unnecessary,
     25 // as it is possible to emulate the creation and traversal of a dominance forest
     26 // by directly using the dominator tree, rather than actually constructing the
     27 // dominance forest.  This technique is explained in:
     28 //
     29 //   Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
     30 //     Quality and Efficiency,
     31 //   In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
     32 //   Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
     33 //   CGO '09. IEEE, Washington, DC, 114-125.
     34 //
     35 // Careful implementation allows for all of the dominator forest interference
     36 // checks to be performed at once in a single depth-first traversal of the
     37 // dominator tree, which is what is implemented here.
     38 //
     39 //===----------------------------------------------------------------------===//
     40 
     41 #define DEBUG_TYPE "strongphielim"
     42 #include "llvm/CodeGen/Passes.h"
     43 #include "PHIEliminationUtils.h"
     44 #include "llvm/ADT/DenseSet.h"
     45 #include "llvm/ADT/Statistic.h"
     46 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
     47 #include "llvm/CodeGen/MachineDominators.h"
     48 #include "llvm/CodeGen/MachineFunctionPass.h"
     49 #include "llvm/CodeGen/MachineInstrBuilder.h"
     50 #include "llvm/CodeGen/MachineRegisterInfo.h"
     51 #include "llvm/Support/Debug.h"
     52 #include "llvm/Target/TargetInstrInfo.h"
     53 using namespace llvm;
     54 
     55 namespace {
     56   class StrongPHIElimination : public MachineFunctionPass {
     57   public:
     58     static char ID; // Pass identification, replacement for typeid
     59     StrongPHIElimination() : MachineFunctionPass(ID) {
     60       initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
     61     }
     62 
     63     virtual void getAnalysisUsage(AnalysisUsage&) const;
     64     bool runOnMachineFunction(MachineFunction&);
     65 
     66   private:
     67     /// This struct represents a single node in the union-find data structure
     68     /// representing the variable congruence classes. There is one difference
     69     /// from a normal union-find data structure. We steal two bits from the parent
     70     /// pointer . One of these bits is used to represent whether the register
     71     /// itself has been isolated, and the other is used to represent whether the
     72     /// PHI with that register as its destination has been isolated.
     73     ///
     74     /// Note that this leads to the strange situation where the leader of a
     75     /// congruence class may no longer logically be a member, due to being
     76     /// isolated.
     77     struct Node {
     78       enum Flags {
     79         kRegisterIsolatedFlag = 1,
     80         kPHIIsolatedFlag = 2
     81       };
     82       Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
     83 
     84       Node *getLeader();
     85 
     86       PointerIntPair<Node*, 2> parent;
     87       unsigned value;
     88       unsigned rank;
     89     };
     90 
     91     /// Add a register in a new congruence class containing only itself.
     92     void addReg(unsigned);
     93 
     94     /// Join the congruence classes of two registers. This function is biased
     95     /// towards the left argument, i.e. after
     96     ///
     97     /// addReg(r2);
     98     /// unionRegs(r1, r2);
     99     ///
    100     /// the leader of the unioned congruence class is the same as the leader of
    101     /// r1's congruence class prior to the union. This is actually relied upon
    102     /// in the copy insertion code.
    103     void unionRegs(unsigned, unsigned);
    104 
    105     /// Get the color of a register. The color is 0 if the register has been
    106     /// isolated.
    107     unsigned getRegColor(unsigned);
    108 
    109     // Isolate a register.
    110     void isolateReg(unsigned);
    111 
    112     /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
    113     /// isolated. Otherwise, it is the original color of its destination and
    114     /// all of its operands (before they were isolated, if they were).
    115     unsigned getPHIColor(MachineInstr*);
    116 
    117     /// Isolate a PHI.
    118     void isolatePHI(MachineInstr*);
    119 
    120     /// Traverses a basic block, splitting any interferences found between
    121     /// registers in the same congruence class. It takes two DenseMaps as
    122     /// arguments that it also updates: CurrentDominatingParent, which maps
    123     /// a color to the register in that congruence class whose definition was
    124     /// most recently seen, and ImmediateDominatingParent, which maps a register
    125     /// to the register in the same congruence class that most immediately
    126     /// dominates it.
    127     ///
    128     /// This function assumes that it is being called in a depth-first traversal
    129     /// of the dominator tree.
    130     void SplitInterferencesForBasicBlock(
    131       MachineBasicBlock&,
    132       DenseMap<unsigned, unsigned> &CurrentDominatingParent,
    133       DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
    134 
    135     // Lowers a PHI instruction, inserting copies of the source and destination
    136     // registers as necessary.
    137     void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
    138 
    139     // Merges the live interval of Reg into NewReg and renames Reg to NewReg
    140     // everywhere that Reg appears. Requires Reg and NewReg to have non-
    141     // overlapping lifetimes.
    142     void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
    143 
    144     MachineRegisterInfo *MRI;
    145     const TargetInstrInfo *TII;
    146     MachineDominatorTree *DT;
    147     LiveIntervals *LI;
    148 
    149     BumpPtrAllocator Allocator;
    150 
    151     DenseMap<unsigned, Node*> RegNodeMap;
    152 
    153     // Maps a basic block to a list of its defs of registers that appear as PHI
    154     // sources.
    155     DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
    156 
    157     // Maps a color to a pair of a MachineInstr* and a virtual register, which
    158     // is the operand of that PHI corresponding to the current basic block.
    159     DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
    160 
    161     // FIXME: Can these two data structures be combined? Would a std::multimap
    162     // be any better?
    163 
    164     // Stores pairs of predecessor basic blocks and the source registers of
    165     // inserted copy instructions.
    166     typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
    167     SrcCopySet InsertedSrcCopySet;
    168 
    169     // Maps pairs of predecessor basic blocks and colors to their defining copy
    170     // instructions.
    171     typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
    172       SrcCopyMap;
    173     SrcCopyMap InsertedSrcCopyMap;
    174 
    175     // Maps inserted destination copy registers to their defining copy
    176     // instructions.
    177     typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
    178     DestCopyMap InsertedDestCopies;
    179   };
    180 
    181   struct MIIndexCompare {
    182     MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
    183 
    184     bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
    185       return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
    186     }
    187 
    188     LiveIntervals *LI;
    189   };
    190 } // namespace
    191 
    192 STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
    193 STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
    194 STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
    195 
    196 char StrongPHIElimination::ID = 0;
    197 INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
    198   "Eliminate PHI nodes for register allocation, intelligently", false, false)
    199 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    200 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
    201 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
    202 INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
    203   "Eliminate PHI nodes for register allocation, intelligently", false, false)
    204 
    205 char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
    206 
    207 void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
    208   AU.setPreservesCFG();
    209   AU.addRequired<MachineDominatorTree>();
    210   AU.addRequired<SlotIndexes>();
    211   AU.addPreserved<SlotIndexes>();
    212   AU.addRequired<LiveIntervals>();
    213   AU.addPreserved<LiveIntervals>();
    214   MachineFunctionPass::getAnalysisUsage(AU);
    215 }
    216 
    217 static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
    218   // FIXME: This only needs to check from the first terminator, as only the
    219   // first terminator can use a virtual register.
    220   for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
    221     assert (RI != MBB->rend());
    222     MachineInstr *MI = &*RI;
    223 
    224     for (MachineInstr::mop_iterator OI = MI->operands_begin(),
    225          OE = MI->operands_end(); OI != OE; ++OI) {
    226       MachineOperand &MO = *OI;
    227       if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
    228         return &MO;
    229     }
    230   }
    231 }
    232 
    233 bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
    234   MRI = &MF.getRegInfo();
    235   TII = MF.getTarget().getInstrInfo();
    236   DT = &getAnalysis<MachineDominatorTree>();
    237   LI = &getAnalysis<LiveIntervals>();
    238 
    239   for (MachineFunction::iterator I = MF.begin(), E = MF.end();
    240        I != E; ++I) {
    241     for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    242          BBI != BBE && BBI->isPHI(); ++BBI) {
    243       unsigned DestReg = BBI->getOperand(0).getReg();
    244       addReg(DestReg);
    245       PHISrcDefs[I].push_back(BBI);
    246 
    247       for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
    248         MachineOperand &SrcMO = BBI->getOperand(i);
    249         unsigned SrcReg = SrcMO.getReg();
    250         addReg(SrcReg);
    251         unionRegs(DestReg, SrcReg);
    252 
    253         MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
    254         if (DefMI)
    255           PHISrcDefs[DefMI->getParent()].push_back(DefMI);
    256       }
    257     }
    258   }
    259 
    260   // Perform a depth-first traversal of the dominator tree, splitting
    261   // interferences amongst PHI-congruence classes.
    262   DenseMap<unsigned, unsigned> CurrentDominatingParent;
    263   DenseMap<unsigned, unsigned> ImmediateDominatingParent;
    264   for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
    265        DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
    266     SplitInterferencesForBasicBlock(*DI->getBlock(),
    267                                     CurrentDominatingParent,
    268                                     ImmediateDominatingParent);
    269   }
    270 
    271   // Insert copies for all PHI source and destination registers.
    272   for (MachineFunction::iterator I = MF.begin(), E = MF.end();
    273        I != E; ++I) {
    274     for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    275          BBI != BBE && BBI->isPHI(); ++BBI) {
    276       InsertCopiesForPHI(BBI, I);
    277     }
    278   }
    279 
    280   // FIXME: Preserve the equivalence classes during copy insertion and use
    281   // the preversed equivalence classes instead of recomputing them.
    282   RegNodeMap.clear();
    283   for (MachineFunction::iterator I = MF.begin(), E = MF.end();
    284        I != E; ++I) {
    285     for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    286          BBI != BBE && BBI->isPHI(); ++BBI) {
    287       unsigned DestReg = BBI->getOperand(0).getReg();
    288       addReg(DestReg);
    289 
    290       for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
    291         unsigned SrcReg = BBI->getOperand(i).getReg();
    292         addReg(SrcReg);
    293         unionRegs(DestReg, SrcReg);
    294       }
    295     }
    296   }
    297 
    298   DenseMap<unsigned, unsigned> RegRenamingMap;
    299   bool Changed = false;
    300   for (MachineFunction::iterator I = MF.begin(), E = MF.end();
    301        I != E; ++I) {
    302     MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    303     while (BBI != BBE && BBI->isPHI()) {
    304       MachineInstr *PHI = BBI;
    305 
    306       assert(PHI->getNumOperands() > 0);
    307 
    308       unsigned SrcReg = PHI->getOperand(1).getReg();
    309       unsigned SrcColor = getRegColor(SrcReg);
    310       unsigned NewReg = RegRenamingMap[SrcColor];
    311       if (!NewReg) {
    312         NewReg = SrcReg;
    313         RegRenamingMap[SrcColor] = SrcReg;
    314       }
    315       MergeLIsAndRename(SrcReg, NewReg);
    316 
    317       unsigned DestReg = PHI->getOperand(0).getReg();
    318       if (!InsertedDestCopies.count(DestReg))
    319         MergeLIsAndRename(DestReg, NewReg);
    320 
    321       for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
    322         unsigned SrcReg = PHI->getOperand(i).getReg();
    323         MergeLIsAndRename(SrcReg, NewReg);
    324       }
    325 
    326       ++BBI;
    327       LI->RemoveMachineInstrFromMaps(PHI);
    328       PHI->eraseFromParent();
    329       Changed = true;
    330     }
    331   }
    332 
    333   // Due to the insertion of copies to split live ranges, the live intervals are
    334   // guaranteed to not overlap, except in one case: an original PHI source and a
    335   // PHI destination copy. In this case, they have the same value and thus don't
    336   // truly intersect, so we merge them into the value live at that point.
    337   // FIXME: Is there some better way we can handle this?
    338   for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
    339        E = InsertedDestCopies.end(); I != E; ++I) {
    340     unsigned DestReg = I->first;
    341     unsigned DestColor = getRegColor(DestReg);
    342     unsigned NewReg = RegRenamingMap[DestColor];
    343 
    344     LiveInterval &DestLI = LI->getInterval(DestReg);
    345     LiveInterval &NewLI = LI->getInterval(NewReg);
    346 
    347     assert(DestLI.ranges.size() == 1
    348            && "PHI destination copy's live interval should be a single live "
    349                "range from the beginning of the BB to the copy instruction.");
    350     LiveRange *DestLR = DestLI.begin();
    351     VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
    352     if (!NewVNI) {
    353       NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
    354       MachineInstr *CopyInstr = I->second;
    355       CopyInstr->getOperand(1).setIsKill(true);
    356     }
    357 
    358     LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
    359     NewLI.addRange(NewLR);
    360 
    361     LI->removeInterval(DestReg);
    362     MRI->replaceRegWith(DestReg, NewReg);
    363   }
    364 
    365   // Adjust the live intervals of all PHI source registers to handle the case
    366   // where the PHIs in successor blocks were the only later uses of the source
    367   // register.
    368   for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
    369        E = InsertedSrcCopySet.end(); I != E; ++I) {
    370     MachineBasicBlock *MBB = I->first;
    371     unsigned SrcReg = I->second;
    372     if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
    373       SrcReg = RenamedRegister;
    374 
    375     LiveInterval &SrcLI = LI->getInterval(SrcReg);
    376 
    377     bool isLiveOut = false;
    378     for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
    379          SE = MBB->succ_end(); SI != SE; ++SI) {
    380       if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
    381         isLiveOut = true;
    382         break;
    383       }
    384     }
    385 
    386     if (isLiveOut)
    387       continue;
    388 
    389     MachineOperand *LastUse = findLastUse(MBB, SrcReg);
    390     assert(LastUse);
    391     SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
    392     SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB));
    393     LastUse->setIsKill(true);
    394   }
    395 
    396   Allocator.Reset();
    397   RegNodeMap.clear();
    398   PHISrcDefs.clear();
    399   InsertedSrcCopySet.clear();
    400   InsertedSrcCopyMap.clear();
    401   InsertedDestCopies.clear();
    402 
    403   return Changed;
    404 }
    405 
    406 void StrongPHIElimination::addReg(unsigned Reg) {
    407   Node *&N = RegNodeMap[Reg];
    408   if (!N)
    409     N = new (Allocator) Node(Reg);
    410 }
    411 
    412 StrongPHIElimination::Node*
    413 StrongPHIElimination::Node::getLeader() {
    414   Node *N = this;
    415   Node *Parent = parent.getPointer();
    416   Node *Grandparent = Parent->parent.getPointer();
    417 
    418   while (Parent != Grandparent) {
    419     N->parent.setPointer(Grandparent);
    420     N = Grandparent;
    421     Parent = Parent->parent.getPointer();
    422     Grandparent = Parent->parent.getPointer();
    423   }
    424 
    425   return Parent;
    426 }
    427 
    428 unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
    429   DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
    430   if (RI == RegNodeMap.end())
    431     return 0;
    432   Node *Node = RI->second;
    433   if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
    434     return 0;
    435   return Node->getLeader()->value;
    436 }
    437 
    438 void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
    439   Node *Node1 = RegNodeMap[Reg1]->getLeader();
    440   Node *Node2 = RegNodeMap[Reg2]->getLeader();
    441 
    442   if (Node1->rank > Node2->rank) {
    443     Node2->parent.setPointer(Node1->getLeader());
    444   } else if (Node1->rank < Node2->rank) {
    445     Node1->parent.setPointer(Node2->getLeader());
    446   } else if (Node1 != Node2) {
    447     Node2->parent.setPointer(Node1->getLeader());
    448     Node1->rank++;
    449   }
    450 }
    451 
    452 void StrongPHIElimination::isolateReg(unsigned Reg) {
    453   Node *Node = RegNodeMap[Reg];
    454   Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
    455 }
    456 
    457 unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
    458   assert(PHI->isPHI());
    459 
    460   unsigned DestReg = PHI->getOperand(0).getReg();
    461   Node *DestNode = RegNodeMap[DestReg];
    462   if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
    463     return 0;
    464 
    465   for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
    466     unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
    467     if (SrcColor)
    468       return SrcColor;
    469   }
    470   return 0;
    471 }
    472 
    473 void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
    474   assert(PHI->isPHI());
    475   Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
    476   Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
    477 }
    478 
    479 /// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
    480 /// interferences found between registers in the same congruence class. It
    481 /// takes two DenseMaps as arguments that it also updates:
    482 ///
    483 /// 1) CurrentDominatingParent, which maps a color to the register in that
    484 ///    congruence class whose definition was most recently seen.
    485 ///
    486 /// 2) ImmediateDominatingParent, which maps a register to the register in the
    487 ///    same congruence class that most immediately dominates it.
    488 ///
    489 /// This function assumes that it is being called in a depth-first traversal
    490 /// of the dominator tree.
    491 ///
    492 /// The algorithm used here is a generalization of the dominance-based SSA test
    493 /// for two variables. If there are variables a_1, ..., a_n such that
    494 ///
    495 ///   def(a_1) dom ... dom def(a_n),
    496 ///
    497 /// then we can test for an interference between any two a_i by only using O(n)
    498 /// interference tests between pairs of variables. If i < j and a_i and a_j
    499 /// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
    500 /// Thus, in order to test for an interference involving a_i, we need only check
    501 /// for a potential interference with a_i+1.
    502 ///
    503 /// This method can be generalized to arbitrary sets of variables by performing
    504 /// a depth-first traversal of the dominator tree. As we traverse down a branch
    505 /// of the dominator tree, we keep track of the current dominating variable and
    506 /// only perform an interference test with that variable. However, when we go to
    507 /// another branch of the dominator tree, the definition of the current dominating
    508 /// variable may no longer dominate the current block. In order to correct this,
    509 /// we need to use a stack of past choices of the current dominating variable
    510 /// and pop from this stack until we find a variable whose definition actually
    511 /// dominates the current block.
    512 ///
    513 /// There will be one push on this stack for each variable that has become the
    514 /// current dominating variable, so instead of using an explicit stack we can
    515 /// simply associate the previous choice for a current dominating variable with
    516 /// the new choice. This works better in our implementation, where we test for
    517 /// interference in multiple distinct sets at once.
    518 void
    519 StrongPHIElimination::SplitInterferencesForBasicBlock(
    520     MachineBasicBlock &MBB,
    521     DenseMap<unsigned, unsigned> &CurrentDominatingParent,
    522     DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
    523   // Sort defs by their order in the original basic block, as the code below
    524   // assumes that it is processing definitions in dominance order.
    525   std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
    526   std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
    527 
    528   for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
    529        BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
    530     for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
    531          E = (*BBI)->operands_end(); I != E; ++I) {
    532       const MachineOperand &MO = *I;
    533 
    534       // FIXME: This would be faster if it were possible to bail out of checking
    535       // an instruction's operands after the explicit defs, but this is incorrect
    536       // for variadic instructions, which may appear before register allocation
    537       // in the future.
    538       if (!MO.isReg() || !MO.isDef())
    539         continue;
    540 
    541       unsigned DestReg = MO.getReg();
    542       if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
    543         continue;
    544 
    545       // If the virtual register being defined is not used in any PHI or has
    546       // already been isolated, then there are no more interferences to check.
    547       unsigned DestColor = getRegColor(DestReg);
    548       if (!DestColor)
    549         continue;
    550 
    551       // The input to this pass sometimes is not in SSA form in every basic
    552       // block, as some virtual registers have redefinitions. We could eliminate
    553       // this by fixing the passes that generate the non-SSA code, or we could
    554       // handle it here by tracking defining machine instructions rather than
    555       // virtual registers. For now, we just handle the situation conservatively
    556       // in a way that will possibly lead to false interferences.
    557       unsigned &CurrentParent = CurrentDominatingParent[DestColor];
    558       unsigned NewParent = CurrentParent;
    559       if (NewParent == DestReg)
    560         continue;
    561 
    562       // Pop registers from the stack represented by ImmediateDominatingParent
    563       // until we find a parent that dominates the current instruction.
    564       while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
    565                            || !getRegColor(NewParent)))
    566         NewParent = ImmediateDominatingParent[NewParent];
    567 
    568       // If NewParent is nonzero, then its definition dominates the current
    569       // instruction, so it is only necessary to check for the liveness of
    570       // NewParent in order to check for an interference.
    571       if (NewParent
    572           && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
    573         // If there is an interference, always isolate the new register. This
    574         // could be improved by using a heuristic that decides which of the two
    575         // registers to isolate.
    576         isolateReg(DestReg);
    577         CurrentParent = NewParent;
    578       } else {
    579         // If there is no interference, update ImmediateDominatingParent and set
    580         // the CurrentDominatingParent for this color to the current register.
    581         ImmediateDominatingParent[DestReg] = NewParent;
    582         CurrentParent = DestReg;
    583       }
    584     }
    585   }
    586 
    587   // We now walk the PHIs in successor blocks and check for interferences. This
    588   // is necessary because the use of a PHI's operands are logically contained in
    589   // the predecessor block. The def of a PHI's destination register is processed
    590   // along with the other defs in a basic block.
    591 
    592   CurrentPHIForColor.clear();
    593 
    594   for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
    595        SE = MBB.succ_end(); SI != SE; ++SI) {
    596     for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
    597          BBI != BBE && BBI->isPHI(); ++BBI) {
    598       MachineInstr *PHI = BBI;
    599 
    600       // If a PHI is already isolated, either by being isolated directly or
    601       // having all of its operands isolated, ignore it.
    602       unsigned Color = getPHIColor(PHI);
    603       if (!Color)
    604         continue;
    605 
    606       // Find the index of the PHI operand that corresponds to this basic block.
    607       unsigned PredIndex;
    608       for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
    609         if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
    610           break;
    611       }
    612       assert(PredIndex < PHI->getNumOperands());
    613       unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
    614 
    615       // Pop registers from the stack represented by ImmediateDominatingParent
    616       // until we find a parent that dominates the current instruction.
    617       unsigned &CurrentParent = CurrentDominatingParent[Color];
    618       unsigned NewParent = CurrentParent;
    619       while (NewParent
    620              && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
    621                  || !getRegColor(NewParent)))
    622         NewParent = ImmediateDominatingParent[NewParent];
    623       CurrentParent = NewParent;
    624 
    625       // If there is an interference with a register, always isolate the
    626       // register rather than the PHI. It is also possible to isolate the
    627       // PHI, but that introduces copies for all of the registers involved
    628       // in that PHI.
    629       if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
    630                     && NewParent != PredOperandReg)
    631         isolateReg(NewParent);
    632 
    633       std::pair<MachineInstr*, unsigned>
    634         &CurrentPHI = CurrentPHIForColor[Color];
    635 
    636       // If two PHIs have the same operand from every shared predecessor, then
    637       // they don't actually interfere. Otherwise, isolate the current PHI. This
    638       // could possibly be improved, e.g. we could isolate the PHI with the
    639       // fewest operands.
    640       if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
    641         isolatePHI(PHI);
    642       else
    643         CurrentPHI = std::make_pair(PHI, PredOperandReg);
    644     }
    645   }
    646 }
    647 
    648 void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
    649                                               MachineBasicBlock *MBB) {
    650   assert(PHI->isPHI());
    651   ++NumPHIsLowered;
    652   unsigned PHIColor = getPHIColor(PHI);
    653 
    654   for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
    655     MachineOperand &SrcMO = PHI->getOperand(i);
    656 
    657     // If a source is defined by an implicit def, there is no need to insert a
    658     // copy in the predecessor.
    659     if (SrcMO.isUndef())
    660       continue;
    661 
    662     unsigned SrcReg = SrcMO.getReg();
    663     assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
    664            "Machine PHI Operands must all be virtual registers!");
    665 
    666     MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
    667     unsigned SrcColor = getRegColor(SrcReg);
    668 
    669     // If neither the PHI nor the operand were isolated, then we only need to
    670     // set the phi-kill flag on the VNInfo at this PHI.
    671     if (PHIColor && SrcColor == PHIColor) {
    672       LiveInterval &SrcInterval = LI->getInterval(SrcReg);
    673       SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
    674       VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
    675       (void)SrcVNI;
    676       assert(SrcVNI);
    677       continue;
    678     }
    679 
    680     unsigned CopyReg = 0;
    681     if (PHIColor) {
    682       SrcCopyMap::const_iterator I
    683         = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
    684       CopyReg
    685         = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
    686     }
    687 
    688     if (!CopyReg) {
    689       const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    690       CopyReg = MRI->createVirtualRegister(RC);
    691 
    692       MachineBasicBlock::iterator
    693         CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
    694       unsigned SrcSubReg = SrcMO.getSubReg();
    695       MachineInstr *CopyInstr = BuildMI(*PredBB,
    696                                         CopyInsertPoint,
    697                                         PHI->getDebugLoc(),
    698                                         TII->get(TargetOpcode::COPY),
    699                                         CopyReg).addReg(SrcReg, 0, SrcSubReg);
    700       LI->InsertMachineInstrInMaps(CopyInstr);
    701       ++NumSrcCopiesInserted;
    702 
    703       // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
    704       // the newly added range.
    705       LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
    706       InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
    707 
    708       addReg(CopyReg);
    709       if (PHIColor) {
    710         unionRegs(PHIColor, CopyReg);
    711         assert(getRegColor(CopyReg) != CopyReg);
    712       } else {
    713         PHIColor = CopyReg;
    714         assert(getRegColor(CopyReg) == CopyReg);
    715       }
    716 
    717       // Insert into map if not already there.
    718       InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor),
    719                                                CopyInstr));
    720     }
    721 
    722     SrcMO.setReg(CopyReg);
    723 
    724     // If SrcReg is not live beyond the PHI, trim its interval so that it is no
    725     // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
    726     // processed later, but this is still correct to do at this point because we
    727     // never rely on LiveIntervals being correct while inserting copies.
    728     // FIXME: Should this just count uses at PHIs like the normal PHIElimination
    729     // pass does?
    730     LiveInterval &SrcLI = LI->getInterval(SrcReg);
    731     SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
    732     SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
    733     SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
    734     if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
    735       SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
    736   }
    737 
    738   unsigned DestReg = PHI->getOperand(0).getReg();
    739   unsigned DestColor = getRegColor(DestReg);
    740 
    741   if (PHIColor && DestColor == PHIColor) {
    742     LiveInterval &DestLI = LI->getInterval(DestReg);
    743 
    744     // Set the phi-def flag for the VN at this PHI.
    745     SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
    746     VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
    747     assert(DestVNI);
    748 
    749     // Prior to PHI elimination, the live ranges of PHIs begin at their defining
    750     // instruction. After PHI elimination, PHI instructions are replaced by VNs
    751     // with the phi-def flag set, and the live ranges of these VNs start at the
    752     // beginning of the basic block.
    753     SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
    754     DestVNI->def = MBBStartIndex;
    755     DestLI.addRange(LiveRange(MBBStartIndex,
    756                               PHIIndex.getRegSlot(),
    757                               DestVNI));
    758     return;
    759   }
    760 
    761   const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
    762   unsigned CopyReg = MRI->createVirtualRegister(RC);
    763 
    764   MachineInstr *CopyInstr = BuildMI(*MBB,
    765                                     MBB->SkipPHIsAndLabels(MBB->begin()),
    766                                     PHI->getDebugLoc(),
    767                                     TII->get(TargetOpcode::COPY),
    768                                     DestReg).addReg(CopyReg);
    769   LI->InsertMachineInstrInMaps(CopyInstr);
    770   PHI->getOperand(0).setReg(CopyReg);
    771   ++NumDestCopiesInserted;
    772 
    773   // Add the region from the beginning of MBB to the copy instruction to
    774   // CopyReg's live interval, and give the VNInfo the phidef flag.
    775   LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
    776   SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
    777   SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
    778   VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
    779                                         LI->getVNInfoAllocator());
    780   CopyLI.addRange(LiveRange(MBBStartIndex,
    781                             DestCopyIndex.getRegSlot(),
    782                             CopyVNI));
    783 
    784   // Adjust DestReg's live interval to adjust for its new definition at
    785   // CopyInstr.
    786   LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
    787   SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
    788   DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot());
    789 
    790   VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
    791   assert(DestVNI);
    792   DestVNI->def = DestCopyIndex.getRegSlot();
    793 
    794   InsertedDestCopies[CopyReg] = CopyInstr;
    795 }
    796 
    797 void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
    798   if (Reg == NewReg)
    799     return;
    800 
    801   LiveInterval &OldLI = LI->getInterval(Reg);
    802   LiveInterval &NewLI = LI->getInterval(NewReg);
    803 
    804   // Merge the live ranges of the two registers.
    805   DenseMap<VNInfo*, VNInfo*> VNMap;
    806   for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
    807        LRI != LRE; ++LRI) {
    808     LiveRange OldLR = *LRI;
    809     VNInfo *OldVN = OldLR.valno;
    810 
    811     VNInfo *&NewVN = VNMap[OldVN];
    812     if (!NewVN) {
    813       NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
    814       VNMap[OldVN] = NewVN;
    815     }
    816 
    817     LiveRange LR(OldLR.start, OldLR.end, NewVN);
    818     NewLI.addRange(LR);
    819   }
    820 
    821   // Remove the LiveInterval for the register being renamed and replace all
    822   // of its defs and uses with the new register.
    823   LI->removeInterval(Reg);
    824   MRI->replaceRegWith(Reg, NewReg);
    825 }
    826