Home | History | Annotate | Download | only in CodeGen
      1 //===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass duplicates basic blocks ending in unconditional branches into
     11 // the tails of their predecessors.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "tailduplication"
     16 #include "llvm/Function.h"
     17 #include "llvm/CodeGen/Passes.h"
     18 #include "llvm/CodeGen/MachineModuleInfo.h"
     19 #include "llvm/CodeGen/MachineFunctionPass.h"
     20 #include "llvm/CodeGen/MachineInstrBuilder.h"
     21 #include "llvm/CodeGen/MachineRegisterInfo.h"
     22 #include "llvm/CodeGen/MachineSSAUpdater.h"
     23 #include "llvm/Target/TargetInstrInfo.h"
     24 #include "llvm/Support/CommandLine.h"
     25 #include "llvm/Support/Debug.h"
     26 #include "llvm/Support/ErrorHandling.h"
     27 #include "llvm/Support/raw_ostream.h"
     28 #include "llvm/ADT/SmallSet.h"
     29 #include "llvm/ADT/SetVector.h"
     30 #include "llvm/ADT/Statistic.h"
     31 using namespace llvm;
     32 
     33 STATISTIC(NumTails     , "Number of tails duplicated");
     34 STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
     35 STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
     36 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
     37 STATISTIC(NumAddedPHIs , "Number of phis added");
     38 
     39 // Heuristic for tail duplication.
     40 static cl::opt<unsigned>
     41 TailDuplicateSize("tail-dup-size",
     42                   cl::desc("Maximum instructions to consider tail duplicating"),
     43                   cl::init(2), cl::Hidden);
     44 
     45 static cl::opt<bool>
     46 TailDupVerify("tail-dup-verify",
     47               cl::desc("Verify sanity of PHI instructions during taildup"),
     48               cl::init(false), cl::Hidden);
     49 
     50 static cl::opt<unsigned>
     51 TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
     52 
     53 typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
     54 
     55 namespace {
     56   /// TailDuplicatePass - Perform tail duplication.
     57   class TailDuplicatePass : public MachineFunctionPass {
     58     bool PreRegAlloc;
     59     const TargetInstrInfo *TII;
     60     MachineModuleInfo *MMI;
     61     MachineRegisterInfo *MRI;
     62 
     63     // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
     64     SmallVector<unsigned, 16> SSAUpdateVRs;
     65 
     66     // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
     67     // source virtual registers.
     68     DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
     69 
     70   public:
     71     static char ID;
     72     explicit TailDuplicatePass(bool PreRA) :
     73       MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
     74 
     75     virtual bool runOnMachineFunction(MachineFunction &MF);
     76     virtual const char *getPassName() const { return "Tail Duplication"; }
     77 
     78   private:
     79     void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
     80                            MachineBasicBlock *BB);
     81     void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
     82                     MachineBasicBlock *PredBB,
     83                     DenseMap<unsigned, unsigned> &LocalVRMap,
     84                     SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
     85                     const DenseSet<unsigned> &UsedByPhi,
     86                     bool Remove);
     87     void DuplicateInstruction(MachineInstr *MI,
     88                               MachineBasicBlock *TailBB,
     89                               MachineBasicBlock *PredBB,
     90                               MachineFunction &MF,
     91                               DenseMap<unsigned, unsigned> &LocalVRMap,
     92                               const DenseSet<unsigned> &UsedByPhi);
     93     void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
     94                               SmallVector<MachineBasicBlock*, 8> &TDBBs,
     95                               SmallSetVector<MachineBasicBlock*, 8> &Succs);
     96     bool TailDuplicateBlocks(MachineFunction &MF);
     97     bool shouldTailDuplicate(const MachineFunction &MF,
     98                              bool IsSimple, MachineBasicBlock &TailBB);
     99     bool isSimpleBB(MachineBasicBlock *TailBB);
    100     bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
    101     bool duplicateSimpleBB(MachineBasicBlock *TailBB,
    102                            SmallVector<MachineBasicBlock*, 8> &TDBBs,
    103                            const DenseSet<unsigned> &RegsUsedByPhi,
    104                            SmallVector<MachineInstr*, 16> &Copies);
    105     bool TailDuplicate(MachineBasicBlock *TailBB,
    106                        bool IsSimple,
    107                        MachineFunction &MF,
    108                        SmallVector<MachineBasicBlock*, 8> &TDBBs,
    109                        SmallVector<MachineInstr*, 16> &Copies);
    110     bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
    111                                 bool IsSimple,
    112                                 MachineFunction &MF);
    113 
    114     void RemoveDeadBlock(MachineBasicBlock *MBB);
    115   };
    116 
    117   char TailDuplicatePass::ID = 0;
    118 }
    119 
    120 FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
    121   return new TailDuplicatePass(PreRegAlloc);
    122 }
    123 
    124 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
    125   TII = MF.getTarget().getInstrInfo();
    126   MRI = &MF.getRegInfo();
    127   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
    128 
    129   bool MadeChange = false;
    130   while (TailDuplicateBlocks(MF))
    131     MadeChange = true;
    132 
    133   return MadeChange;
    134 }
    135 
    136 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
    137   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
    138     MachineBasicBlock *MBB = I;
    139     SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
    140                                                 MBB->pred_end());
    141     MachineBasicBlock::iterator MI = MBB->begin();
    142     while (MI != MBB->end()) {
    143       if (!MI->isPHI())
    144         break;
    145       for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
    146              PE = Preds.end(); PI != PE; ++PI) {
    147         MachineBasicBlock *PredBB = *PI;
    148         bool Found = false;
    149         for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
    150           MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
    151           if (PHIBB == PredBB) {
    152             Found = true;
    153             break;
    154           }
    155         }
    156         if (!Found) {
    157           dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
    158           dbgs() << "  missing input from predecessor BB#"
    159                  << PredBB->getNumber() << '\n';
    160           llvm_unreachable(0);
    161         }
    162       }
    163 
    164       for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
    165         MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
    166         if (CheckExtra && !Preds.count(PHIBB)) {
    167           dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
    168                  << ": " << *MI;
    169           dbgs() << "  extra input from predecessor BB#"
    170                  << PHIBB->getNumber() << '\n';
    171           llvm_unreachable(0);
    172         }
    173         if (PHIBB->getNumber() < 0) {
    174           dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
    175           dbgs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
    176           llvm_unreachable(0);
    177         }
    178       }
    179       ++MI;
    180     }
    181   }
    182 }
    183 
    184 /// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
    185 bool
    186 TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
    187                                           bool IsSimple,
    188                                           MachineFunction &MF) {
    189   // Save the successors list.
    190   SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
    191                                               MBB->succ_end());
    192 
    193   SmallVector<MachineBasicBlock*, 8> TDBBs;
    194   SmallVector<MachineInstr*, 16> Copies;
    195   if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
    196     return false;
    197 
    198   ++NumTails;
    199 
    200   SmallVector<MachineInstr*, 8> NewPHIs;
    201   MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
    202 
    203   // TailBB's immediate successors are now successors of those predecessors
    204   // which duplicated TailBB. Add the predecessors as sources to the PHI
    205   // instructions.
    206   bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
    207   if (PreRegAlloc)
    208     UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
    209 
    210   // If it is dead, remove it.
    211   if (isDead) {
    212     NumInstrDups -= MBB->size();
    213     RemoveDeadBlock(MBB);
    214     ++NumDeadBlocks;
    215   }
    216 
    217   // Update SSA form.
    218   if (!SSAUpdateVRs.empty()) {
    219     for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
    220       unsigned VReg = SSAUpdateVRs[i];
    221       SSAUpdate.Initialize(VReg);
    222 
    223       // If the original definition is still around, add it as an available
    224       // value.
    225       MachineInstr *DefMI = MRI->getVRegDef(VReg);
    226       MachineBasicBlock *DefBB = 0;
    227       if (DefMI) {
    228         DefBB = DefMI->getParent();
    229         SSAUpdate.AddAvailableValue(DefBB, VReg);
    230       }
    231 
    232       // Add the new vregs as available values.
    233       DenseMap<unsigned, AvailableValsTy>::iterator LI =
    234         SSAUpdateVals.find(VReg);
    235       for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
    236         MachineBasicBlock *SrcBB = LI->second[j].first;
    237         unsigned SrcReg = LI->second[j].second;
    238         SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
    239       }
    240 
    241       // Rewrite uses that are outside of the original def's block.
    242       MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
    243       while (UI != MRI->use_end()) {
    244         MachineOperand &UseMO = UI.getOperand();
    245         MachineInstr *UseMI = &*UI;
    246         ++UI;
    247         if (UseMI->isDebugValue()) {
    248           // SSAUpdate can replace the use with an undef. That creates
    249           // a debug instruction that is a kill.
    250           // FIXME: Should it SSAUpdate job to delete debug instructions
    251           // instead of replacing the use with undef?
    252           UseMI->eraseFromParent();
    253           continue;
    254         }
    255         if (UseMI->getParent() == DefBB && !UseMI->isPHI())
    256           continue;
    257         SSAUpdate.RewriteUse(UseMO);
    258       }
    259     }
    260 
    261     SSAUpdateVRs.clear();
    262     SSAUpdateVals.clear();
    263   }
    264 
    265   // Eliminate some of the copies inserted by tail duplication to maintain
    266   // SSA form.
    267   for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
    268     MachineInstr *Copy = Copies[i];
    269     if (!Copy->isCopy())
    270       continue;
    271     unsigned Dst = Copy->getOperand(0).getReg();
    272     unsigned Src = Copy->getOperand(1).getReg();
    273     MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
    274     if (++UI == MRI->use_end()) {
    275       // Copy is the only use. Do trivial copy propagation here.
    276       MRI->replaceRegWith(Dst, Src);
    277       Copy->eraseFromParent();
    278     }
    279   }
    280 
    281   if (NewPHIs.size())
    282     NumAddedPHIs += NewPHIs.size();
    283 
    284   return true;
    285 }
    286 
    287 /// TailDuplicateBlocks - Look for small blocks that are unconditionally
    288 /// branched to and do not fall through. Tail-duplicate their instructions
    289 /// into their predecessors to eliminate (dynamic) branches.
    290 bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
    291   bool MadeChange = false;
    292 
    293   if (PreRegAlloc && TailDupVerify) {
    294     DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
    295     VerifyPHIs(MF, true);
    296   }
    297 
    298   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
    299     MachineBasicBlock *MBB = I++;
    300 
    301     if (NumTails == TailDupLimit)
    302       break;
    303 
    304     bool IsSimple = isSimpleBB(MBB);
    305 
    306     if (!shouldTailDuplicate(MF, IsSimple, *MBB))
    307       continue;
    308 
    309     MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
    310   }
    311 
    312   if (PreRegAlloc && TailDupVerify)
    313     VerifyPHIs(MF, false);
    314 
    315   return MadeChange;
    316 }
    317 
    318 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
    319                          const MachineRegisterInfo *MRI) {
    320   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
    321          UE = MRI->use_end(); UI != UE; ++UI) {
    322     MachineInstr *UseMI = &*UI;
    323     if (UseMI->isDebugValue())
    324       continue;
    325     if (UseMI->getParent() != BB)
    326       return true;
    327   }
    328   return false;
    329 }
    330 
    331 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
    332   for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
    333     if (MI->getOperand(i+1).getMBB() == SrcBB)
    334       return i;
    335   return 0;
    336 }
    337 
    338 
    339 // Remember which registers are used by phis in this block. This is
    340 // used to determine which registers are liveout while modifying the
    341 // block (which is why we need to copy the information).
    342 static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
    343                               DenseSet<unsigned> *UsedByPhi) {
    344   for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
    345       I != E; ++I) {
    346     const MachineInstr &MI = *I;
    347     if (!MI.isPHI())
    348       break;
    349     for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
    350       unsigned SrcReg = MI.getOperand(i).getReg();
    351       UsedByPhi->insert(SrcReg);
    352     }
    353   }
    354 }
    355 
    356 /// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
    357 /// SSA update.
    358 void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
    359                                           MachineBasicBlock *BB) {
    360   DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
    361   if (LI != SSAUpdateVals.end())
    362     LI->second.push_back(std::make_pair(BB, NewReg));
    363   else {
    364     AvailableValsTy Vals;
    365     Vals.push_back(std::make_pair(BB, NewReg));
    366     SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
    367     SSAUpdateVRs.push_back(OrigReg);
    368   }
    369 }
    370 
    371 /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
    372 /// Remember the source register that's contributed by PredBB and update SSA
    373 /// update map.
    374 void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
    375                                    MachineBasicBlock *TailBB,
    376                                    MachineBasicBlock *PredBB,
    377                                    DenseMap<unsigned, unsigned> &LocalVRMap,
    378                            SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
    379                                    const DenseSet<unsigned> &RegsUsedByPhi,
    380                                    bool Remove) {
    381   unsigned DefReg = MI->getOperand(0).getReg();
    382   unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
    383   assert(SrcOpIdx && "Unable to find matching PHI source?");
    384   unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
    385   const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
    386   LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
    387 
    388   // Insert a copy from source to the end of the block. The def register is the
    389   // available value liveout of the block.
    390   unsigned NewDef = MRI->createVirtualRegister(RC);
    391   Copies.push_back(std::make_pair(NewDef, SrcReg));
    392   if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
    393     AddSSAUpdateEntry(DefReg, NewDef, PredBB);
    394 
    395   if (!Remove)
    396     return;
    397 
    398   // Remove PredBB from the PHI node.
    399   MI->RemoveOperand(SrcOpIdx+1);
    400   MI->RemoveOperand(SrcOpIdx);
    401   if (MI->getNumOperands() == 1)
    402     MI->eraseFromParent();
    403 }
    404 
    405 /// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
    406 /// the source operands due to earlier PHI translation.
    407 void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
    408                                      MachineBasicBlock *TailBB,
    409                                      MachineBasicBlock *PredBB,
    410                                      MachineFunction &MF,
    411                                      DenseMap<unsigned, unsigned> &LocalVRMap,
    412                                      const DenseSet<unsigned> &UsedByPhi) {
    413   MachineInstr *NewMI = TII->duplicate(MI, MF);
    414   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
    415     MachineOperand &MO = NewMI->getOperand(i);
    416     if (!MO.isReg())
    417       continue;
    418     unsigned Reg = MO.getReg();
    419     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    420       continue;
    421     if (MO.isDef()) {
    422       const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    423       unsigned NewReg = MRI->createVirtualRegister(RC);
    424       MO.setReg(NewReg);
    425       LocalVRMap.insert(std::make_pair(Reg, NewReg));
    426       if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
    427         AddSSAUpdateEntry(Reg, NewReg, PredBB);
    428     } else {
    429       DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
    430       if (VI != LocalVRMap.end())
    431         MO.setReg(VI->second);
    432     }
    433   }
    434   PredBB->insert(PredBB->end(), NewMI);
    435 }
    436 
    437 /// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
    438 /// blocks, the successors have gained new predecessors. Update the PHI
    439 /// instructions in them accordingly.
    440 void
    441 TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
    442                                   SmallVector<MachineBasicBlock*, 8> &TDBBs,
    443                                   SmallSetVector<MachineBasicBlock*,8> &Succs) {
    444   for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
    445          SE = Succs.end(); SI != SE; ++SI) {
    446     MachineBasicBlock *SuccBB = *SI;
    447     for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
    448          II != EE; ++II) {
    449       if (!II->isPHI())
    450         break;
    451       unsigned Idx = 0;
    452       for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
    453         MachineOperand &MO = II->getOperand(i+1);
    454         if (MO.getMBB() == FromBB) {
    455           Idx = i;
    456           break;
    457         }
    458       }
    459 
    460       assert(Idx != 0);
    461       MachineOperand &MO0 = II->getOperand(Idx);
    462       unsigned Reg = MO0.getReg();
    463       if (isDead) {
    464         // Folded into the previous BB.
    465         // There could be duplicate phi source entries. FIXME: Should sdisel
    466         // or earlier pass fixed this?
    467         for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
    468           MachineOperand &MO = II->getOperand(i+1);
    469           if (MO.getMBB() == FromBB) {
    470             II->RemoveOperand(i+1);
    471             II->RemoveOperand(i);
    472           }
    473         }
    474       } else
    475         Idx = 0;
    476 
    477       // If Idx is set, the operands at Idx and Idx+1 must be removed.
    478       // We reuse the location to avoid expensive RemoveOperand calls.
    479 
    480       DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
    481       if (LI != SSAUpdateVals.end()) {
    482         // This register is defined in the tail block.
    483         for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
    484           MachineBasicBlock *SrcBB = LI->second[j].first;
    485           // If we didn't duplicate a bb into a particular predecessor, we
    486           // might still have added an entry to SSAUpdateVals to correcly
    487           // recompute SSA. If that case, avoid adding a dummy extra argument
    488           // this PHI.
    489           if (!SrcBB->isSuccessor(SuccBB))
    490             continue;
    491 
    492           unsigned SrcReg = LI->second[j].second;
    493           if (Idx != 0) {
    494             II->getOperand(Idx).setReg(SrcReg);
    495             II->getOperand(Idx+1).setMBB(SrcBB);
    496             Idx = 0;
    497           } else {
    498             II->addOperand(MachineOperand::CreateReg(SrcReg, false));
    499             II->addOperand(MachineOperand::CreateMBB(SrcBB));
    500           }
    501         }
    502       } else {
    503         // Live in tail block, must also be live in predecessors.
    504         for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
    505           MachineBasicBlock *SrcBB = TDBBs[j];
    506           if (Idx != 0) {
    507             II->getOperand(Idx).setReg(Reg);
    508             II->getOperand(Idx+1).setMBB(SrcBB);
    509             Idx = 0;
    510           } else {
    511             II->addOperand(MachineOperand::CreateReg(Reg, false));
    512             II->addOperand(MachineOperand::CreateMBB(SrcBB));
    513           }
    514         }
    515       }
    516       if (Idx != 0) {
    517         II->RemoveOperand(Idx+1);
    518         II->RemoveOperand(Idx);
    519       }
    520     }
    521   }
    522 }
    523 
    524 /// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
    525 bool
    526 TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
    527                                        bool IsSimple,
    528                                        MachineBasicBlock &TailBB) {
    529   // Only duplicate blocks that end with unconditional branches.
    530   if (TailBB.canFallThrough())
    531     return false;
    532 
    533   // Don't try to tail-duplicate single-block loops.
    534   if (TailBB.isSuccessor(&TailBB))
    535     return false;
    536 
    537   // Set the limit on the cost to duplicate. When optimizing for size,
    538   // duplicate only one, because one branch instruction can be eliminated to
    539   // compensate for the duplication.
    540   unsigned MaxDuplicateCount;
    541   if (TailDuplicateSize.getNumOccurrences() == 0 &&
    542       MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
    543     MaxDuplicateCount = 1;
    544   else
    545     MaxDuplicateCount = TailDuplicateSize;
    546 
    547   // If the target has hardware branch prediction that can handle indirect
    548   // branches, duplicating them can often make them predictable when there
    549   // are common paths through the code.  The limit needs to be high enough
    550   // to allow undoing the effects of tail merging and other optimizations
    551   // that rearrange the predecessors of the indirect branch.
    552 
    553   bool HasIndirectbr = false;
    554   if (!TailBB.empty())
    555     HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
    556 
    557   if (HasIndirectbr && PreRegAlloc)
    558     MaxDuplicateCount = 20;
    559 
    560   // Check the instructions in the block to determine whether tail-duplication
    561   // is invalid or unlikely to be profitable.
    562   unsigned InstrCount = 0;
    563   for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
    564        ++I) {
    565     // Non-duplicable things shouldn't be tail-duplicated.
    566     if (I->getDesc().isNotDuplicable())
    567       return false;
    568 
    569     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
    570     // A return may expand into a lot more instructions (e.g. reload of callee
    571     // saved registers) after PEI.
    572     if (PreRegAlloc && I->getDesc().isReturn())
    573       return false;
    574 
    575     // Avoid duplicating calls before register allocation. Calls presents a
    576     // barrier to register allocation so duplicating them may end up increasing
    577     // spills.
    578     if (PreRegAlloc && I->getDesc().isCall())
    579       return false;
    580 
    581     if (!I->isPHI() && !I->isDebugValue())
    582       InstrCount += 1;
    583 
    584     if (InstrCount > MaxDuplicateCount)
    585       return false;
    586   }
    587 
    588   if (HasIndirectbr && PreRegAlloc)
    589     return true;
    590 
    591   if (IsSimple)
    592     return true;
    593 
    594   if (!PreRegAlloc)
    595     return true;
    596 
    597   return canCompletelyDuplicateBB(TailBB);
    598 }
    599 
    600 /// isSimpleBB - True if this BB has only one unconditional jump.
    601 bool
    602 TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
    603   if (TailBB->succ_size() != 1)
    604     return false;
    605   if (TailBB->pred_empty())
    606     return false;
    607   MachineBasicBlock::iterator I = TailBB->begin();
    608   MachineBasicBlock::iterator E = TailBB->end();
    609   while (I != E && I->isDebugValue())
    610     ++I;
    611   if (I == E)
    612     return true;
    613   return I->getDesc().isUnconditionalBranch();
    614 }
    615 
    616 static bool
    617 bothUsedInPHI(const MachineBasicBlock &A,
    618               SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
    619   for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
    620          SE = A.succ_end(); SI != SE; ++SI) {
    621     MachineBasicBlock *BB = *SI;
    622     if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
    623       return true;
    624   }
    625 
    626   return false;
    627 }
    628 
    629 bool
    630 TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
    631   SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
    632 
    633   for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
    634        PE = BB.pred_end(); PI != PE; ++PI) {
    635     MachineBasicBlock *PredBB = *PI;
    636 
    637     if (PredBB->succ_size() > 1)
    638       return false;
    639 
    640     MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
    641     SmallVector<MachineOperand, 4> PredCond;
    642     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
    643       return false;
    644 
    645     if (!PredCond.empty())
    646       return false;
    647   }
    648   return true;
    649 }
    650 
    651 bool
    652 TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
    653                                      SmallVector<MachineBasicBlock*, 8> &TDBBs,
    654                                      const DenseSet<unsigned> &UsedByPhi,
    655                                      SmallVector<MachineInstr*, 16> &Copies) {
    656   SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
    657                                            TailBB->succ_end());
    658   SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
    659                                            TailBB->pred_end());
    660   bool Changed = false;
    661   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
    662        PE = Preds.end(); PI != PE; ++PI) {
    663     MachineBasicBlock *PredBB = *PI;
    664 
    665     if (PredBB->getLandingPadSuccessor())
    666       continue;
    667 
    668     if (bothUsedInPHI(*PredBB, Succs))
    669       continue;
    670 
    671     MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
    672     SmallVector<MachineOperand, 4> PredCond;
    673     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
    674       continue;
    675 
    676     Changed = true;
    677     DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
    678                  << "From simple Succ: " << *TailBB);
    679 
    680     MachineBasicBlock *NewTarget = *TailBB->succ_begin();
    681     MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
    682 
    683     // Make PredFBB explicit.
    684     if (PredCond.empty())
    685       PredFBB = PredTBB;
    686 
    687     // Make fall through explicit.
    688     if (!PredTBB)
    689       PredTBB = NextBB;
    690     if (!PredFBB)
    691       PredFBB = NextBB;
    692 
    693     // Redirect
    694     if (PredFBB == TailBB)
    695       PredFBB = NewTarget;
    696     if (PredTBB == TailBB)
    697       PredTBB = NewTarget;
    698 
    699     // Make the branch unconditional if possible
    700     if (PredTBB == PredFBB) {
    701       PredCond.clear();
    702       PredFBB = NULL;
    703     }
    704 
    705     // Avoid adding fall through branches.
    706     if (PredFBB == NextBB)
    707       PredFBB = NULL;
    708     if (PredTBB == NextBB && PredFBB == NULL)
    709       PredTBB = NULL;
    710 
    711     TII->RemoveBranch(*PredBB);
    712 
    713     if (PredTBB)
    714       TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
    715 
    716     PredBB->removeSuccessor(TailBB);
    717     unsigned NumSuccessors = PredBB->succ_size();
    718     assert(NumSuccessors <= 1);
    719     if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
    720       PredBB->addSuccessor(NewTarget);
    721 
    722     TDBBs.push_back(PredBB);
    723   }
    724   return Changed;
    725 }
    726 
    727 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
    728 /// of its predecessors.
    729 bool
    730 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
    731                                  bool IsSimple,
    732                                  MachineFunction &MF,
    733                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
    734                                  SmallVector<MachineInstr*, 16> &Copies) {
    735   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
    736 
    737   DenseSet<unsigned> UsedByPhi;
    738   getRegsUsedByPHIs(*TailBB, &UsedByPhi);
    739 
    740   if (IsSimple)
    741     return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
    742 
    743   // Iterate through all the unique predecessors and tail-duplicate this
    744   // block into them, if possible. Copying the list ahead of time also
    745   // avoids trouble with the predecessor list reallocating.
    746   bool Changed = false;
    747   SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
    748                                               TailBB->pred_end());
    749   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
    750        PE = Preds.end(); PI != PE; ++PI) {
    751     MachineBasicBlock *PredBB = *PI;
    752 
    753     assert(TailBB != PredBB &&
    754            "Single-block loop should have been rejected earlier!");
    755     // EH edges are ignored by AnalyzeBranch.
    756     if (PredBB->succ_size() > 1)
    757       continue;
    758 
    759     MachineBasicBlock *PredTBB, *PredFBB;
    760     SmallVector<MachineOperand, 4> PredCond;
    761     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
    762       continue;
    763     if (!PredCond.empty())
    764       continue;
    765     // Don't duplicate into a fall-through predecessor (at least for now).
    766     if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
    767       continue;
    768 
    769     DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
    770                  << "From Succ: " << *TailBB);
    771 
    772     TDBBs.push_back(PredBB);
    773 
    774     // Remove PredBB's unconditional branch.
    775     TII->RemoveBranch(*PredBB);
    776 
    777     // Clone the contents of TailBB into PredBB.
    778     DenseMap<unsigned, unsigned> LocalVRMap;
    779     SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
    780     MachineBasicBlock::iterator I = TailBB->begin();
    781     while (I != TailBB->end()) {
    782       MachineInstr *MI = &*I;
    783       ++I;
    784       if (MI->isPHI()) {
    785         // Replace the uses of the def of the PHI with the register coming
    786         // from PredBB.
    787         ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
    788       } else {
    789         // Replace def of virtual registers with new registers, and update
    790         // uses with PHI source register or the new registers.
    791         DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
    792       }
    793     }
    794     MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
    795     for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
    796       Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
    797                                TII->get(TargetOpcode::COPY),
    798                                CopyInfos[i].first).addReg(CopyInfos[i].second));
    799     }
    800 
    801     // Simplify
    802     TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
    803 
    804     NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
    805 
    806     // Update the CFG.
    807     PredBB->removeSuccessor(PredBB->succ_begin());
    808     assert(PredBB->succ_empty() &&
    809            "TailDuplicate called on block with multiple successors!");
    810     for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
    811            E = TailBB->succ_end(); I != E; ++I)
    812       PredBB->addSuccessor(*I);
    813 
    814     Changed = true;
    815     ++NumTailDups;
    816   }
    817 
    818   // If TailBB was duplicated into all its predecessors except for the prior
    819   // block, which falls through unconditionally, move the contents of this
    820   // block into the prior block.
    821   MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
    822   MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
    823   SmallVector<MachineOperand, 4> PriorCond;
    824   // This has to check PrevBB->succ_size() because EH edges are ignored by
    825   // AnalyzeBranch.
    826   if (PrevBB->succ_size() == 1 &&
    827       !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
    828       PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
    829       !TailBB->hasAddressTaken()) {
    830     DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
    831           << "From MBB: " << *TailBB);
    832     if (PreRegAlloc) {
    833       DenseMap<unsigned, unsigned> LocalVRMap;
    834       SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
    835       MachineBasicBlock::iterator I = TailBB->begin();
    836       // Process PHI instructions first.
    837       while (I != TailBB->end() && I->isPHI()) {
    838         // Replace the uses of the def of the PHI with the register coming
    839         // from PredBB.
    840         MachineInstr *MI = &*I++;
    841         ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
    842         if (MI->getParent())
    843           MI->eraseFromParent();
    844       }
    845 
    846       // Now copy the non-PHI instructions.
    847       while (I != TailBB->end()) {
    848         // Replace def of virtual registers with new registers, and update
    849         // uses with PHI source register or the new registers.
    850         MachineInstr *MI = &*I++;
    851         DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
    852         MI->eraseFromParent();
    853       }
    854       MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
    855       for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
    856         Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
    857                                  TII->get(TargetOpcode::COPY),
    858                                  CopyInfos[i].first)
    859                            .addReg(CopyInfos[i].second));
    860       }
    861     } else {
    862       // No PHIs to worry about, just splice the instructions over.
    863       PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
    864     }
    865     PrevBB->removeSuccessor(PrevBB->succ_begin());
    866     assert(PrevBB->succ_empty());
    867     PrevBB->transferSuccessors(TailBB);
    868     TDBBs.push_back(PrevBB);
    869     Changed = true;
    870   }
    871 
    872   // If this is after register allocation, there are no phis to fix.
    873   if (!PreRegAlloc)
    874     return Changed;
    875 
    876   // If we made no changes so far, we are safe.
    877   if (!Changed)
    878     return Changed;
    879 
    880 
    881   // Handle the nasty case in that we duplicated a block that is part of a loop
    882   // into some but not all of its predecessors. For example:
    883   //    1 -> 2 <-> 3                 |
    884   //          \                      |
    885   //           \---> rest            |
    886   // if we duplicate 2 into 1 but not into 3, we end up with
    887   // 12 -> 3 <-> 2 -> rest           |
    888   //   \             /               |
    889   //    \----->-----/                |
    890   // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
    891   // with a phi in 3 (which now dominates 2).
    892   // What we do here is introduce a copy in 3 of the register defined by the
    893   // phi, just like when we are duplicating 2 into 3, but we don't copy any
    894   // real instructions or remove the 3 -> 2 edge from the phi in 2.
    895   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
    896        PE = Preds.end(); PI != PE; ++PI) {
    897     MachineBasicBlock *PredBB = *PI;
    898     if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
    899       continue;
    900 
    901     // EH edges
    902     if (PredBB->succ_size() != 1)
    903       continue;
    904 
    905     DenseMap<unsigned, unsigned> LocalVRMap;
    906     SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
    907     MachineBasicBlock::iterator I = TailBB->begin();
    908     // Process PHI instructions first.
    909     while (I != TailBB->end() && I->isPHI()) {
    910       // Replace the uses of the def of the PHI with the register coming
    911       // from PredBB.
    912       MachineInstr *MI = &*I++;
    913       ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
    914     }
    915     MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
    916     for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
    917       Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
    918                                TII->get(TargetOpcode::COPY),
    919                                CopyInfos[i].first).addReg(CopyInfos[i].second));
    920     }
    921   }
    922 
    923   return Changed;
    924 }
    925 
    926 /// RemoveDeadBlock - Remove the specified dead machine basic block from the
    927 /// function, updating the CFG.
    928 void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
    929   assert(MBB->pred_empty() && "MBB must be dead!");
    930   DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
    931 
    932   // Remove all successors.
    933   while (!MBB->succ_empty())
    934     MBB->removeSuccessor(MBB->succ_end()-1);
    935 
    936   // Remove the block.
    937   MBB->eraseFromParent();
    938 }
    939