Home | History | Annotate | Download | only in CodeGen
      1 //===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass eliminates PHI instructions by aggressively coalescing the copies
     11 // that would be inserted by a naive algorithm and only inserting the copies
     12 // that are necessary. The coalescing technique initially assumes that all
     13 // registers appearing in a PHI instruction do not interfere. It then eliminates
     14 // proven interferences, using dominators to only perform a linear number of
     15 // interference tests instead of the quadratic number of interference tests
     16 // that this would naively require. This is a technique derived from:
     17 //
     18 //    Budimlic, et al. Fast copy coalescing and live-range identification.
     19 //    In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
     20 //    Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
     21 //    PLDI '02. ACM, New York, NY, 25-32.
     22 //
     23 // The original implementation constructs a data structure they call a dominance
     24 // forest for this purpose. The dominance forest was shown to be unnecessary,
     25 // as it is possible to emulate the creation and traversal of a dominance forest
     26 // by directly using the dominator tree, rather than actually constructing the
     27 // dominance forest.  This technique is explained in:
     28 //
     29 //   Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
     30 //     Quality and Efficiency,
     31 //   In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
     32 //   Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
     33 //   CGO '09. IEEE, Washington, DC, 114-125.
     34 //
     35 // Careful implementation allows for all of the dominator forest interference
     36 // checks to be performed at once in a single depth-first traversal of the
     37 // dominator tree, which is what is implemented here.
     38 //
     39 //===----------------------------------------------------------------------===//
     40 
     41 #define DEBUG_TYPE "strongphielim"
     42 #include "PHIEliminationUtils.h"
     43 #include "llvm/CodeGen/Passes.h"
     44 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
     45 #include "llvm/CodeGen/MachineDominators.h"
     46 #include "llvm/CodeGen/MachineFunctionPass.h"
     47 #include "llvm/CodeGen/MachineInstrBuilder.h"
     48 #include "llvm/CodeGen/MachineRegisterInfo.h"
     49 #include "llvm/Target/TargetInstrInfo.h"
     50 #include "llvm/ADT/DenseSet.h"
     51 #include "llvm/ADT/Statistic.h"
     52 #include "llvm/Support/Debug.h"
     53 using namespace llvm;
     54 
     55 namespace {
     56   class StrongPHIElimination : public MachineFunctionPass {
     57   public:
     58     static char ID; // Pass identification, replacement for typeid
     59     StrongPHIElimination() : MachineFunctionPass(ID) {
     60       initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
     61     }
     62 
     63     virtual void getAnalysisUsage(AnalysisUsage&) const;
     64     bool runOnMachineFunction(MachineFunction&);
     65 
     66   private:
     67     /// This struct represents a single node in the union-find data structure
     68     /// representing the variable congruence classes. There is one difference
     69     /// from a normal union-find data structure. We steal two bits from the parent
     70     /// pointer . One of these bits is used to represent whether the register
     71     /// itself has been isolated, and the other is used to represent whether the
     72     /// PHI with that register as its destination has been isolated.
     73     ///
     74     /// Note that this leads to the strange situation where the leader of a
     75     /// congruence class may no longer logically be a member, due to being
     76     /// isolated.
     77     struct Node {
     78       enum Flags {
     79         kRegisterIsolatedFlag = 1,
     80         kPHIIsolatedFlag = 2
     81       };
     82       Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
     83 
     84       Node *getLeader();
     85 
     86       PointerIntPair<Node*, 2> parent;
     87       unsigned value;
     88       unsigned rank;
     89     };
     90 
     91     /// Add a register in a new congruence class containing only itself.
     92     void addReg(unsigned);
     93 
     94     /// Join the congruence classes of two registers. This function is biased
     95     /// towards the left argument, i.e. after
     96     ///
     97     /// addReg(r2);
     98     /// unionRegs(r1, r2);
     99     ///
    100     /// the leader of the unioned congruence class is the same as the leader of
    101     /// r1's congruence class prior to the union. This is actually relied upon
    102     /// in the copy insertion code.
    103     void unionRegs(unsigned, unsigned);
    104 
    105     /// Get the color of a register. The color is 0 if the register has been
    106     /// isolated.
    107     unsigned getRegColor(unsigned);
    108 
    109     // Isolate a register.
    110     void isolateReg(unsigned);
    111 
    112     /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
    113     /// isolated. Otherwise, it is the original color of its destination and
    114     /// all of its operands (before they were isolated, if they were).
    115     unsigned getPHIColor(MachineInstr*);
    116 
    117     /// Isolate a PHI.
    118     void isolatePHI(MachineInstr*);
    119 
    120     /// Traverses a basic block, splitting any interferences found between
    121     /// registers in the same congruence class. It takes two DenseMaps as
    122     /// arguments that it also updates: CurrentDominatingParent, which maps
    123     /// a color to the register in that congruence class whose definition was
    124     /// most recently seen, and ImmediateDominatingParent, which maps a register
    125     /// to the register in the same congruence class that most immediately
    126     /// dominates it.
    127     ///
    128     /// This function assumes that it is being called in a depth-first traversal
    129     /// of the dominator tree.
    130     void SplitInterferencesForBasicBlock(
    131       MachineBasicBlock&,
    132       DenseMap<unsigned, unsigned> &CurrentDominatingParent,
    133       DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
    134 
    135     // Lowers a PHI instruction, inserting copies of the source and destination
    136     // registers as necessary.
    137     void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
    138 
    139     // Merges the live interval of Reg into NewReg and renames Reg to NewReg
    140     // everywhere that Reg appears. Requires Reg and NewReg to have non-
    141     // overlapping lifetimes.
    142     void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
    143 
    144     MachineRegisterInfo *MRI;
    145     const TargetInstrInfo *TII;
    146     MachineDominatorTree *DT;
    147     LiveIntervals *LI;
    148 
    149     BumpPtrAllocator Allocator;
    150 
    151     DenseMap<unsigned, Node*> RegNodeMap;
    152 
    153     // Maps a basic block to a list of its defs of registers that appear as PHI
    154     // sources.
    155     DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
    156 
    157     // Maps a color to a pair of a MachineInstr* and a virtual register, which
    158     // is the operand of that PHI corresponding to the current basic block.
    159     DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
    160 
    161     // FIXME: Can these two data structures be combined? Would a std::multimap
    162     // be any better?
    163 
    164     // Stores pairs of predecessor basic blocks and the source registers of
    165     // inserted copy instructions.
    166     typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
    167     SrcCopySet InsertedSrcCopySet;
    168 
    169     // Maps pairs of predecessor basic blocks and colors to their defining copy
    170     // instructions.
    171     typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
    172       SrcCopyMap;
    173     SrcCopyMap InsertedSrcCopyMap;
    174 
    175     // Maps inserted destination copy registers to their defining copy
    176     // instructions.
    177     typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
    178     DestCopyMap InsertedDestCopies;
    179   };
    180 
    181   struct MIIndexCompare {
    182     MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
    183 
    184     bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
    185       return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
    186     }
    187 
    188     LiveIntervals *LI;
    189   };
    190 } // namespace
    191 
    192 STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
    193 STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
    194 STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
    195 
    196 char StrongPHIElimination::ID = 0;
    197 INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
    198   "Eliminate PHI nodes for register allocation, intelligently", false, false)
    199 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    200 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
    201 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
    202 INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
    203   "Eliminate PHI nodes for register allocation, intelligently", false, false)
    204 
    205 char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
    206 
    207 void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
    208   AU.setPreservesCFG();
    209   AU.addRequired<MachineDominatorTree>();
    210   AU.addRequired<SlotIndexes>();
    211   AU.addPreserved<SlotIndexes>();
    212   AU.addRequired<LiveIntervals>();
    213   AU.addPreserved<LiveIntervals>();
    214   MachineFunctionPass::getAnalysisUsage(AU);
    215 }
    216 
    217 static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
    218   // FIXME: This only needs to check from the first terminator, as only the
    219   // first terminator can use a virtual register.
    220   for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
    221     assert (RI != MBB->rend());
    222     MachineInstr *MI = &*RI;
    223 
    224     for (MachineInstr::mop_iterator OI = MI->operands_begin(),
    225          OE = MI->operands_end(); OI != OE; ++OI) {
    226       MachineOperand &MO = *OI;
    227       if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
    228         return &MO;
    229     }
    230   }
    231   return NULL;
    232 }
    233 
    234 bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
    235   MRI = &MF.getRegInfo();
    236   TII = MF.getTarget().getInstrInfo();
    237   DT = &getAnalysis<MachineDominatorTree>();
    238   LI = &getAnalysis<LiveIntervals>();
    239 
    240   for (MachineFunction::iterator I = MF.begin(), E = MF.end();
    241        I != E; ++I) {
    242     for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    243          BBI != BBE && BBI->isPHI(); ++BBI) {
    244       unsigned DestReg = BBI->getOperand(0).getReg();
    245       addReg(DestReg);
    246       PHISrcDefs[I].push_back(BBI);
    247 
    248       for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
    249         MachineOperand &SrcMO = BBI->getOperand(i);
    250         unsigned SrcReg = SrcMO.getReg();
    251         addReg(SrcReg);
    252         unionRegs(DestReg, SrcReg);
    253 
    254         MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
    255         if (DefMI)
    256           PHISrcDefs[DefMI->getParent()].push_back(DefMI);
    257       }
    258     }
    259   }
    260 
    261   // Perform a depth-first traversal of the dominator tree, splitting
    262   // interferences amongst PHI-congruence classes.
    263   DenseMap<unsigned, unsigned> CurrentDominatingParent;
    264   DenseMap<unsigned, unsigned> ImmediateDominatingParent;
    265   for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
    266        DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
    267     SplitInterferencesForBasicBlock(*DI->getBlock(),
    268                                     CurrentDominatingParent,
    269                                     ImmediateDominatingParent);
    270   }
    271 
    272   // Insert copies for all PHI source and destination registers.
    273   for (MachineFunction::iterator I = MF.begin(), E = MF.end();
    274        I != E; ++I) {
    275     for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    276          BBI != BBE && BBI->isPHI(); ++BBI) {
    277       InsertCopiesForPHI(BBI, I);
    278     }
    279   }
    280 
    281   // FIXME: Preserve the equivalence classes during copy insertion and use
    282   // the preversed equivalence classes instead of recomputing them.
    283   RegNodeMap.clear();
    284   for (MachineFunction::iterator I = MF.begin(), E = MF.end();
    285        I != E; ++I) {
    286     for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    287          BBI != BBE && BBI->isPHI(); ++BBI) {
    288       unsigned DestReg = BBI->getOperand(0).getReg();
    289       addReg(DestReg);
    290 
    291       for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
    292         unsigned SrcReg = BBI->getOperand(i).getReg();
    293         addReg(SrcReg);
    294         unionRegs(DestReg, SrcReg);
    295       }
    296     }
    297   }
    298 
    299   DenseMap<unsigned, unsigned> RegRenamingMap;
    300   bool Changed = false;
    301   for (MachineFunction::iterator I = MF.begin(), E = MF.end();
    302        I != E; ++I) {
    303     MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    304     while (BBI != BBE && BBI->isPHI()) {
    305       MachineInstr *PHI = BBI;
    306 
    307       assert(PHI->getNumOperands() > 0);
    308 
    309       unsigned SrcReg = PHI->getOperand(1).getReg();
    310       unsigned SrcColor = getRegColor(SrcReg);
    311       unsigned NewReg = RegRenamingMap[SrcColor];
    312       if (!NewReg) {
    313         NewReg = SrcReg;
    314         RegRenamingMap[SrcColor] = SrcReg;
    315       }
    316       MergeLIsAndRename(SrcReg, NewReg);
    317 
    318       unsigned DestReg = PHI->getOperand(0).getReg();
    319       if (!InsertedDestCopies.count(DestReg))
    320         MergeLIsAndRename(DestReg, NewReg);
    321 
    322       for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
    323         unsigned SrcReg = PHI->getOperand(i).getReg();
    324         MergeLIsAndRename(SrcReg, NewReg);
    325       }
    326 
    327       ++BBI;
    328       LI->RemoveMachineInstrFromMaps(PHI);
    329       PHI->eraseFromParent();
    330       Changed = true;
    331     }
    332   }
    333 
    334   // Due to the insertion of copies to split live ranges, the live intervals are
    335   // guaranteed to not overlap, except in one case: an original PHI source and a
    336   // PHI destination copy. In this case, they have the same value and thus don't
    337   // truly intersect, so we merge them into the value live at that point.
    338   // FIXME: Is there some better way we can handle this?
    339   for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
    340        E = InsertedDestCopies.end(); I != E; ++I) {
    341     unsigned DestReg = I->first;
    342     unsigned DestColor = getRegColor(DestReg);
    343     unsigned NewReg = RegRenamingMap[DestColor];
    344 
    345     LiveInterval &DestLI = LI->getInterval(DestReg);
    346     LiveInterval &NewLI = LI->getInterval(NewReg);
    347 
    348     assert(DestLI.ranges.size() == 1
    349            && "PHI destination copy's live interval should be a single live "
    350                "range from the beginning of the BB to the copy instruction.");
    351     LiveRange *DestLR = DestLI.begin();
    352     VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
    353     if (!NewVNI) {
    354       NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
    355       MachineInstr *CopyInstr = I->second;
    356       CopyInstr->getOperand(1).setIsKill(true);
    357     }
    358 
    359     LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
    360     NewLI.addRange(NewLR);
    361 
    362     LI->removeInterval(DestReg);
    363     MRI->replaceRegWith(DestReg, NewReg);
    364   }
    365 
    366   // Adjust the live intervals of all PHI source registers to handle the case
    367   // where the PHIs in successor blocks were the only later uses of the source
    368   // register.
    369   for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
    370        E = InsertedSrcCopySet.end(); I != E; ++I) {
    371     MachineBasicBlock *MBB = I->first;
    372     unsigned SrcReg = I->second;
    373     if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
    374       SrcReg = RenamedRegister;
    375 
    376     LiveInterval &SrcLI = LI->getInterval(SrcReg);
    377 
    378     bool isLiveOut = false;
    379     for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
    380          SE = MBB->succ_end(); SI != SE; ++SI) {
    381       if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
    382         isLiveOut = true;
    383         break;
    384       }
    385     }
    386 
    387     if (isLiveOut)
    388       continue;
    389 
    390     MachineOperand *LastUse = findLastUse(MBB, SrcReg);
    391     assert(LastUse);
    392     SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
    393     SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
    394     LastUse->setIsKill(true);
    395   }
    396 
    397   LI->renumber();
    398 
    399   Allocator.Reset();
    400   RegNodeMap.clear();
    401   PHISrcDefs.clear();
    402   InsertedSrcCopySet.clear();
    403   InsertedSrcCopyMap.clear();
    404   InsertedDestCopies.clear();
    405 
    406   return Changed;
    407 }
    408 
    409 void StrongPHIElimination::addReg(unsigned Reg) {
    410   if (RegNodeMap.count(Reg))
    411     return;
    412   RegNodeMap[Reg] = new (Allocator) Node(Reg);
    413 }
    414 
    415 StrongPHIElimination::Node*
    416 StrongPHIElimination::Node::getLeader() {
    417   Node *N = this;
    418   Node *Parent = parent.getPointer();
    419   Node *Grandparent = Parent->parent.getPointer();
    420 
    421   while (Parent != Grandparent) {
    422     N->parent.setPointer(Grandparent);
    423     N = Grandparent;
    424     Parent = Parent->parent.getPointer();
    425     Grandparent = Parent->parent.getPointer();
    426   }
    427 
    428   return Parent;
    429 }
    430 
    431 unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
    432   DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
    433   if (RI == RegNodeMap.end())
    434     return 0;
    435   Node *Node = RI->second;
    436   if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
    437     return 0;
    438   return Node->getLeader()->value;
    439 }
    440 
    441 void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
    442   Node *Node1 = RegNodeMap[Reg1]->getLeader();
    443   Node *Node2 = RegNodeMap[Reg2]->getLeader();
    444 
    445   if (Node1->rank > Node2->rank) {
    446     Node2->parent.setPointer(Node1->getLeader());
    447   } else if (Node1->rank < Node2->rank) {
    448     Node1->parent.setPointer(Node2->getLeader());
    449   } else if (Node1 != Node2) {
    450     Node2->parent.setPointer(Node1->getLeader());
    451     Node1->rank++;
    452   }
    453 }
    454 
    455 void StrongPHIElimination::isolateReg(unsigned Reg) {
    456   Node *Node = RegNodeMap[Reg];
    457   Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
    458 }
    459 
    460 unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
    461   assert(PHI->isPHI());
    462 
    463   unsigned DestReg = PHI->getOperand(0).getReg();
    464   Node *DestNode = RegNodeMap[DestReg];
    465   if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
    466     return 0;
    467 
    468   for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
    469     unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
    470     if (SrcColor)
    471       return SrcColor;
    472   }
    473   return 0;
    474 }
    475 
    476 void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
    477   assert(PHI->isPHI());
    478   Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
    479   Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
    480 }
    481 
    482 /// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
    483 /// interferences found between registers in the same congruence class. It
    484 /// takes two DenseMaps as arguments that it also updates:
    485 ///
    486 /// 1) CurrentDominatingParent, which maps a color to the register in that
    487 ///    congruence class whose definition was most recently seen.
    488 ///
    489 /// 2) ImmediateDominatingParent, which maps a register to the register in the
    490 ///    same congruence class that most immediately dominates it.
    491 ///
    492 /// This function assumes that it is being called in a depth-first traversal
    493 /// of the dominator tree.
    494 ///
    495 /// The algorithm used here is a generalization of the dominance-based SSA test
    496 /// for two variables. If there are variables a_1, ..., a_n such that
    497 ///
    498 ///   def(a_1) dom ... dom def(a_n),
    499 ///
    500 /// then we can test for an interference between any two a_i by only using O(n)
    501 /// interference tests between pairs of variables. If i < j and a_i and a_j
    502 /// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
    503 /// Thus, in order to test for an interference involving a_i, we need only check
    504 /// for a potential interference with a_i+1.
    505 ///
    506 /// This method can be generalized to arbitrary sets of variables by performing
    507 /// a depth-first traversal of the dominator tree. As we traverse down a branch
    508 /// of the dominator tree, we keep track of the current dominating variable and
    509 /// only perform an interference test with that variable. However, when we go to
    510 /// another branch of the dominator tree, the definition of the current dominating
    511 /// variable may no longer dominate the current block. In order to correct this,
    512 /// we need to use a stack of past choices of the current dominating variable
    513 /// and pop from this stack until we find a variable whose definition actually
    514 /// dominates the current block.
    515 ///
    516 /// There will be one push on this stack for each variable that has become the
    517 /// current dominating variable, so instead of using an explicit stack we can
    518 /// simply associate the previous choice for a current dominating variable with
    519 /// the new choice. This works better in our implementation, where we test for
    520 /// interference in multiple distinct sets at once.
    521 void
    522 StrongPHIElimination::SplitInterferencesForBasicBlock(
    523     MachineBasicBlock &MBB,
    524     DenseMap<unsigned, unsigned> &CurrentDominatingParent,
    525     DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
    526   // Sort defs by their order in the original basic block, as the code below
    527   // assumes that it is processing definitions in dominance order.
    528   std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
    529   std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
    530 
    531   for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
    532        BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
    533     for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
    534          E = (*BBI)->operands_end(); I != E; ++I) {
    535       const MachineOperand &MO = *I;
    536 
    537       // FIXME: This would be faster if it were possible to bail out of checking
    538       // an instruction's operands after the explicit defs, but this is incorrect
    539       // for variadic instructions, which may appear before register allocation
    540       // in the future.
    541       if (!MO.isReg() || !MO.isDef())
    542         continue;
    543 
    544       unsigned DestReg = MO.getReg();
    545       if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
    546         continue;
    547 
    548       // If the virtual register being defined is not used in any PHI or has
    549       // already been isolated, then there are no more interferences to check.
    550       unsigned DestColor = getRegColor(DestReg);
    551       if (!DestColor)
    552         continue;
    553 
    554       // The input to this pass sometimes is not in SSA form in every basic
    555       // block, as some virtual registers have redefinitions. We could eliminate
    556       // this by fixing the passes that generate the non-SSA code, or we could
    557       // handle it here by tracking defining machine instructions rather than
    558       // virtual registers. For now, we just handle the situation conservatively
    559       // in a way that will possibly lead to false interferences.
    560       unsigned &CurrentParent = CurrentDominatingParent[DestColor];
    561       unsigned NewParent = CurrentParent;
    562       if (NewParent == DestReg)
    563         continue;
    564 
    565       // Pop registers from the stack represented by ImmediateDominatingParent
    566       // until we find a parent that dominates the current instruction.
    567       while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
    568                            || !getRegColor(NewParent)))
    569         NewParent = ImmediateDominatingParent[NewParent];
    570 
    571       // If NewParent is nonzero, then its definition dominates the current
    572       // instruction, so it is only necessary to check for the liveness of
    573       // NewParent in order to check for an interference.
    574       if (NewParent
    575           && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
    576         // If there is an interference, always isolate the new register. This
    577         // could be improved by using a heuristic that decides which of the two
    578         // registers to isolate.
    579         isolateReg(DestReg);
    580         CurrentParent = NewParent;
    581       } else {
    582         // If there is no interference, update ImmediateDominatingParent and set
    583         // the CurrentDominatingParent for this color to the current register.
    584         ImmediateDominatingParent[DestReg] = NewParent;
    585         CurrentParent = DestReg;
    586       }
    587     }
    588   }
    589 
    590   // We now walk the PHIs in successor blocks and check for interferences. This
    591   // is necessary because the use of a PHI's operands are logically contained in
    592   // the predecessor block. The def of a PHI's destination register is processed
    593   // along with the other defs in a basic block.
    594 
    595   CurrentPHIForColor.clear();
    596 
    597   for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
    598        SE = MBB.succ_end(); SI != SE; ++SI) {
    599     for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
    600          BBI != BBE && BBI->isPHI(); ++BBI) {
    601       MachineInstr *PHI = BBI;
    602 
    603       // If a PHI is already isolated, either by being isolated directly or
    604       // having all of its operands isolated, ignore it.
    605       unsigned Color = getPHIColor(PHI);
    606       if (!Color)
    607         continue;
    608 
    609       // Find the index of the PHI operand that corresponds to this basic block.
    610       unsigned PredIndex;
    611       for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
    612         if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
    613           break;
    614       }
    615       assert(PredIndex < PHI->getNumOperands());
    616       unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
    617 
    618       // Pop registers from the stack represented by ImmediateDominatingParent
    619       // until we find a parent that dominates the current instruction.
    620       unsigned &CurrentParent = CurrentDominatingParent[Color];
    621       unsigned NewParent = CurrentParent;
    622       while (NewParent
    623              && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
    624                  || !getRegColor(NewParent)))
    625         NewParent = ImmediateDominatingParent[NewParent];
    626       CurrentParent = NewParent;
    627 
    628       // If there is an interference with a register, always isolate the
    629       // register rather than the PHI. It is also possible to isolate the
    630       // PHI, but that introduces copies for all of the registers involved
    631       // in that PHI.
    632       if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
    633                     && NewParent != PredOperandReg)
    634         isolateReg(NewParent);
    635 
    636       std::pair<MachineInstr*, unsigned>
    637         &CurrentPHI = CurrentPHIForColor[Color];
    638 
    639       // If two PHIs have the same operand from every shared predecessor, then
    640       // they don't actually interfere. Otherwise, isolate the current PHI. This
    641       // could possibly be improved, e.g. we could isolate the PHI with the
    642       // fewest operands.
    643       if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
    644         isolatePHI(PHI);
    645       else
    646         CurrentPHI = std::make_pair(PHI, PredOperandReg);
    647     }
    648   }
    649 }
    650 
    651 void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
    652                                               MachineBasicBlock *MBB) {
    653   assert(PHI->isPHI());
    654   ++NumPHIsLowered;
    655   unsigned PHIColor = getPHIColor(PHI);
    656 
    657   for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
    658     MachineOperand &SrcMO = PHI->getOperand(i);
    659 
    660     // If a source is defined by an implicit def, there is no need to insert a
    661     // copy in the predecessor.
    662     if (SrcMO.isUndef())
    663       continue;
    664 
    665     unsigned SrcReg = SrcMO.getReg();
    666     assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
    667            "Machine PHI Operands must all be virtual registers!");
    668 
    669     MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
    670     unsigned SrcColor = getRegColor(SrcReg);
    671 
    672     // If neither the PHI nor the operand were isolated, then we only need to
    673     // set the phi-kill flag on the VNInfo at this PHI.
    674     if (PHIColor && SrcColor == PHIColor) {
    675       LiveInterval &SrcInterval = LI->getInterval(SrcReg);
    676       SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
    677       VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
    678       assert(SrcVNI);
    679       SrcVNI->setHasPHIKill(true);
    680       continue;
    681     }
    682 
    683     unsigned CopyReg = 0;
    684     if (PHIColor) {
    685       SrcCopyMap::const_iterator I
    686         = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
    687       CopyReg
    688         = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
    689     }
    690 
    691     if (!CopyReg) {
    692       const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    693       CopyReg = MRI->createVirtualRegister(RC);
    694 
    695       MachineBasicBlock::iterator
    696         CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
    697       unsigned SrcSubReg = SrcMO.getSubReg();
    698       MachineInstr *CopyInstr = BuildMI(*PredBB,
    699                                         CopyInsertPoint,
    700                                         PHI->getDebugLoc(),
    701                                         TII->get(TargetOpcode::COPY),
    702                                         CopyReg).addReg(SrcReg, 0, SrcSubReg);
    703       LI->InsertMachineInstrInMaps(CopyInstr);
    704       ++NumSrcCopiesInserted;
    705 
    706       // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
    707       // the newly added range.
    708       LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
    709       InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
    710 
    711       addReg(CopyReg);
    712       if (PHIColor) {
    713         unionRegs(PHIColor, CopyReg);
    714         assert(getRegColor(CopyReg) != CopyReg);
    715       } else {
    716         PHIColor = CopyReg;
    717         assert(getRegColor(CopyReg) == CopyReg);
    718       }
    719 
    720       if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
    721         InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
    722     }
    723 
    724     SrcMO.setReg(CopyReg);
    725 
    726     // If SrcReg is not live beyond the PHI, trim its interval so that it is no
    727     // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
    728     // processed later, but this is still correct to do at this point because we
    729     // never rely on LiveIntervals being correct while inserting copies.
    730     // FIXME: Should this just count uses at PHIs like the normal PHIElimination
    731     // pass does?
    732     LiveInterval &SrcLI = LI->getInterval(SrcReg);
    733     SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
    734     SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
    735     SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
    736     if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
    737       SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
    738   }
    739 
    740   unsigned DestReg = PHI->getOperand(0).getReg();
    741   unsigned DestColor = getRegColor(DestReg);
    742 
    743   if (PHIColor && DestColor == PHIColor) {
    744     LiveInterval &DestLI = LI->getInterval(DestReg);
    745 
    746     // Set the phi-def flag for the VN at this PHI.
    747     SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
    748     VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
    749     assert(DestVNI);
    750     DestVNI->setIsPHIDef(true);
    751 
    752     // Prior to PHI elimination, the live ranges of PHIs begin at their defining
    753     // instruction. After PHI elimination, PHI instructions are replaced by VNs
    754     // with the phi-def flag set, and the live ranges of these VNs start at the
    755     // beginning of the basic block.
    756     SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
    757     DestVNI->def = MBBStartIndex;
    758     DestLI.addRange(LiveRange(MBBStartIndex,
    759                               PHIIndex.getDefIndex(),
    760                               DestVNI));
    761     return;
    762   }
    763 
    764   const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
    765   unsigned CopyReg = MRI->createVirtualRegister(RC);
    766 
    767   MachineInstr *CopyInstr = BuildMI(*MBB,
    768                                     MBB->SkipPHIsAndLabels(MBB->begin()),
    769                                     PHI->getDebugLoc(),
    770                                     TII->get(TargetOpcode::COPY),
    771                                     DestReg).addReg(CopyReg);
    772   LI->InsertMachineInstrInMaps(CopyInstr);
    773   PHI->getOperand(0).setReg(CopyReg);
    774   ++NumDestCopiesInserted;
    775 
    776   // Add the region from the beginning of MBB to the copy instruction to
    777   // CopyReg's live interval, and give the VNInfo the phidef flag.
    778   LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
    779   SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
    780   SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
    781   VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
    782                                         CopyInstr,
    783                                         LI->getVNInfoAllocator());
    784   CopyVNI->setIsPHIDef(true);
    785   CopyLI.addRange(LiveRange(MBBStartIndex,
    786                             DestCopyIndex.getDefIndex(),
    787                             CopyVNI));
    788 
    789   // Adjust DestReg's live interval to adjust for its new definition at
    790   // CopyInstr.
    791   LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
    792   SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
    793   DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
    794 
    795   VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
    796   assert(DestVNI);
    797   DestVNI->def = DestCopyIndex.getDefIndex();
    798 
    799   InsertedDestCopies[CopyReg] = CopyInstr;
    800 }
    801 
    802 void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
    803   if (Reg == NewReg)
    804     return;
    805 
    806   LiveInterval &OldLI = LI->getInterval(Reg);
    807   LiveInterval &NewLI = LI->getInterval(NewReg);
    808 
    809   // Merge the live ranges of the two registers.
    810   DenseMap<VNInfo*, VNInfo*> VNMap;
    811   for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
    812        LRI != LRE; ++LRI) {
    813     LiveRange OldLR = *LRI;
    814     VNInfo *OldVN = OldLR.valno;
    815 
    816     VNInfo *&NewVN = VNMap[OldVN];
    817     if (!NewVN) {
    818       NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
    819       VNMap[OldVN] = NewVN;
    820     }
    821 
    822     LiveRange LR(OldLR.start, OldLR.end, NewVN);
    823     NewLI.addRange(LR);
    824   }
    825 
    826   // Remove the LiveInterval for the register being renamed and replace all
    827   // of its defs and uses with the new register.
    828   LI->removeInterval(Reg);
    829   MRI->replaceRegWith(Reg, NewReg);
    830 }
    831