Home | History | Annotate | Download | only in CodeGen
      1 //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements basic block placement transformations using the CFG
     11 // structure and branch probability estimates.
     12 //
     13 // The pass strives to preserve the structure of the CFG (that is, retain
     14 // a topological ordering of basic blocks) in the absense of a *strong* signal
     15 // to the contrary from probabilities. However, within the CFG structure, it
     16 // attempts to choose an ordering which favors placing more likely sequences of
     17 // blocks adjacent to each other.
     18 //
     19 // The algorithm works from the inner-most loop within a function outward, and
     20 // at each stage walks through the basic blocks, trying to coalesce them into
     21 // sequential chains where allowed by the CFG (or demanded by heavy
     22 // probabilities). Finally, it walks the blocks in topological order, and the
     23 // first time it reaches a chain of basic blocks, it schedules them in the
     24 // function in-order.
     25 //
     26 //===----------------------------------------------------------------------===//
     27 
     28 #define DEBUG_TYPE "block-placement2"
     29 #include "llvm/CodeGen/MachineBasicBlock.h"
     30 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
     31 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
     32 #include "llvm/CodeGen/MachineFunction.h"
     33 #include "llvm/CodeGen/MachineFunctionPass.h"
     34 #include "llvm/CodeGen/MachineLoopInfo.h"
     35 #include "llvm/CodeGen/MachineModuleInfo.h"
     36 #include "llvm/CodeGen/Passes.h"
     37 #include "llvm/Support/Allocator.h"
     38 #include "llvm/Support/Debug.h"
     39 #include "llvm/ADT/DenseMap.h"
     40 #include "llvm/ADT/SmallPtrSet.h"
     41 #include "llvm/ADT/SmallVector.h"
     42 #include "llvm/ADT/Statistic.h"
     43 #include "llvm/Target/TargetInstrInfo.h"
     44 #include "llvm/Target/TargetLowering.h"
     45 #include <algorithm>
     46 using namespace llvm;
     47 
     48 STATISTIC(NumCondBranches, "Number of conditional branches");
     49 STATISTIC(NumUncondBranches, "Number of uncondittional branches");
     50 STATISTIC(CondBranchTakenFreq,
     51           "Potential frequency of taking conditional branches");
     52 STATISTIC(UncondBranchTakenFreq,
     53           "Potential frequency of taking unconditional branches");
     54 
     55 namespace {
     56 class BlockChain;
     57 /// \brief Type for our function-wide basic block -> block chain mapping.
     58 typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
     59 }
     60 
     61 namespace {
     62 /// \brief A chain of blocks which will be laid out contiguously.
     63 ///
     64 /// This is the datastructure representing a chain of consecutive blocks that
     65 /// are profitable to layout together in order to maximize fallthrough
     66 /// probabilities. We also can use a block chain to represent a sequence of
     67 /// basic blocks which have some external (correctness) requirement for
     68 /// sequential layout.
     69 ///
     70 /// Eventually, the block chains will form a directed graph over the function.
     71 /// We provide an SCC-supporting-iterator in order to quicky build and walk the
     72 /// SCCs of block chains within a function.
     73 ///
     74 /// The block chains also have support for calculating and caching probability
     75 /// information related to the chain itself versus other chains. This is used
     76 /// for ranking during the final layout of block chains.
     77 class BlockChain {
     78   /// \brief The sequence of blocks belonging to this chain.
     79   ///
     80   /// This is the sequence of blocks for a particular chain. These will be laid
     81   /// out in-order within the function.
     82   SmallVector<MachineBasicBlock *, 4> Blocks;
     83 
     84   /// \brief A handle to the function-wide basic block to block chain mapping.
     85   ///
     86   /// This is retained in each block chain to simplify the computation of child
     87   /// block chains for SCC-formation and iteration. We store the edges to child
     88   /// basic blocks, and map them back to their associated chains using this
     89   /// structure.
     90   BlockToChainMapType &BlockToChain;
     91 
     92 public:
     93   /// \brief Construct a new BlockChain.
     94   ///
     95   /// This builds a new block chain representing a single basic block in the
     96   /// function. It also registers itself as the chain that block participates
     97   /// in with the BlockToChain mapping.
     98   BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
     99     : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
    100     assert(BB && "Cannot create a chain with a null basic block");
    101     BlockToChain[BB] = this;
    102   }
    103 
    104   /// \brief Iterator over blocks within the chain.
    105   typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
    106 
    107   /// \brief Beginning of blocks within the chain.
    108   iterator begin() { return Blocks.begin(); }
    109 
    110   /// \brief End of blocks within the chain.
    111   iterator end() { return Blocks.end(); }
    112 
    113   /// \brief Merge a block chain into this one.
    114   ///
    115   /// This routine merges a block chain into this one. It takes care of forming
    116   /// a contiguous sequence of basic blocks, updating the edge list, and
    117   /// updating the block -> chain mapping. It does not free or tear down the
    118   /// old chain, but the old chain's block list is no longer valid.
    119   void merge(MachineBasicBlock *BB, BlockChain *Chain) {
    120     assert(BB);
    121     assert(!Blocks.empty());
    122 
    123     // Fast path in case we don't have a chain already.
    124     if (!Chain) {
    125       assert(!BlockToChain[BB]);
    126       Blocks.push_back(BB);
    127       BlockToChain[BB] = this;
    128       return;
    129     }
    130 
    131     assert(BB == *Chain->begin());
    132     assert(Chain->begin() != Chain->end());
    133 
    134     // Update the incoming blocks to point to this chain, and add them to the
    135     // chain structure.
    136     for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
    137          BI != BE; ++BI) {
    138       Blocks.push_back(*BI);
    139       assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
    140       BlockToChain[*BI] = this;
    141     }
    142   }
    143 
    144 #ifndef NDEBUG
    145   /// \brief Dump the blocks in this chain.
    146   void dump() LLVM_ATTRIBUTE_USED {
    147     for (iterator I = begin(), E = end(); I != E; ++I)
    148       (*I)->dump();
    149   }
    150 #endif // NDEBUG
    151 
    152   /// \brief Count of predecessors within the loop currently being processed.
    153   ///
    154   /// This count is updated at each loop we process to represent the number of
    155   /// in-loop predecessors of this chain.
    156   unsigned LoopPredecessors;
    157 };
    158 }
    159 
    160 namespace {
    161 class MachineBlockPlacement : public MachineFunctionPass {
    162   /// \brief A typedef for a block filter set.
    163   typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
    164 
    165   /// \brief A handle to the branch probability pass.
    166   const MachineBranchProbabilityInfo *MBPI;
    167 
    168   /// \brief A handle to the function-wide block frequency pass.
    169   const MachineBlockFrequencyInfo *MBFI;
    170 
    171   /// \brief A handle to the loop info.
    172   const MachineLoopInfo *MLI;
    173 
    174   /// \brief A handle to the target's instruction info.
    175   const TargetInstrInfo *TII;
    176 
    177   /// \brief A handle to the target's lowering info.
    178   const TargetLowering *TLI;
    179 
    180   /// \brief Allocator and owner of BlockChain structures.
    181   ///
    182   /// We build BlockChains lazily by merging together high probability BB
    183   /// sequences acording to the "Algo2" in the paper mentioned at the top of
    184   /// the file. To reduce malloc traffic, we allocate them using this slab-like
    185   /// allocator, and destroy them after the pass completes.
    186   SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
    187 
    188   /// \brief Function wide BasicBlock to BlockChain mapping.
    189   ///
    190   /// This mapping allows efficiently moving from any given basic block to the
    191   /// BlockChain it participates in, if any. We use it to, among other things,
    192   /// allow implicitly defining edges between chains as the existing edges
    193   /// between basic blocks.
    194   DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
    195 
    196   void markChainSuccessors(BlockChain &Chain,
    197                            MachineBasicBlock *LoopHeaderBB,
    198                            SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
    199                            const BlockFilterSet *BlockFilter = 0);
    200   MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
    201                                          BlockChain &Chain,
    202                                          const BlockFilterSet *BlockFilter);
    203   MachineBasicBlock *selectBestCandidateBlock(
    204       BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
    205       const BlockFilterSet *BlockFilter);
    206   MachineBasicBlock *getFirstUnplacedBlock(
    207       MachineFunction &F,
    208       const BlockChain &PlacedChain,
    209       MachineFunction::iterator &PrevUnplacedBlockIt,
    210       const BlockFilterSet *BlockFilter);
    211   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
    212                   SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
    213                   const BlockFilterSet *BlockFilter = 0);
    214   MachineBasicBlock *findBestLoopTop(MachineLoop &L,
    215                                      const BlockFilterSet &LoopBlockSet);
    216   MachineBasicBlock *findBestLoopExit(MachineFunction &F,
    217                                       MachineLoop &L,
    218                                       const BlockFilterSet &LoopBlockSet);
    219   void buildLoopChains(MachineFunction &F, MachineLoop &L);
    220   void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
    221                   const BlockFilterSet &LoopBlockSet);
    222   void buildCFGChains(MachineFunction &F);
    223 
    224 public:
    225   static char ID; // Pass identification, replacement for typeid
    226   MachineBlockPlacement() : MachineFunctionPass(ID) {
    227     initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
    228   }
    229 
    230   bool runOnMachineFunction(MachineFunction &F);
    231 
    232   void getAnalysisUsage(AnalysisUsage &AU) const {
    233     AU.addRequired<MachineBranchProbabilityInfo>();
    234     AU.addRequired<MachineBlockFrequencyInfo>();
    235     AU.addRequired<MachineLoopInfo>();
    236     MachineFunctionPass::getAnalysisUsage(AU);
    237   }
    238 };
    239 }
    240 
    241 char MachineBlockPlacement::ID = 0;
    242 char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
    243 INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
    244                       "Branch Probability Basic Block Placement", false, false)
    245 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
    246 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
    247 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    248 INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
    249                     "Branch Probability Basic Block Placement", false, false)
    250 
    251 #ifndef NDEBUG
    252 /// \brief Helper to print the name of a MBB.
    253 ///
    254 /// Only used by debug logging.
    255 static std::string getBlockName(MachineBasicBlock *BB) {
    256   std::string Result;
    257   raw_string_ostream OS(Result);
    258   OS << "BB#" << BB->getNumber()
    259      << " (derived from LLVM BB '" << BB->getName() << "')";
    260   OS.flush();
    261   return Result;
    262 }
    263 
    264 /// \brief Helper to print the number of a MBB.
    265 ///
    266 /// Only used by debug logging.
    267 static std::string getBlockNum(MachineBasicBlock *BB) {
    268   std::string Result;
    269   raw_string_ostream OS(Result);
    270   OS << "BB#" << BB->getNumber();
    271   OS.flush();
    272   return Result;
    273 }
    274 #endif
    275 
    276 /// \brief Mark a chain's successors as having one fewer preds.
    277 ///
    278 /// When a chain is being merged into the "placed" chain, this routine will
    279 /// quickly walk the successors of each block in the chain and mark them as
    280 /// having one fewer active predecessor. It also adds any successors of this
    281 /// chain which reach the zero-predecessor state to the worklist passed in.
    282 void MachineBlockPlacement::markChainSuccessors(
    283     BlockChain &Chain,
    284     MachineBasicBlock *LoopHeaderBB,
    285     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
    286     const BlockFilterSet *BlockFilter) {
    287   // Walk all the blocks in this chain, marking their successors as having
    288   // a predecessor placed.
    289   for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
    290        CBI != CBE; ++CBI) {
    291     // Add any successors for which this is the only un-placed in-loop
    292     // predecessor to the worklist as a viable candidate for CFG-neutral
    293     // placement. No subsequent placement of this block will violate the CFG
    294     // shape, so we get to use heuristics to choose a favorable placement.
    295     for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
    296                                           SE = (*CBI)->succ_end();
    297          SI != SE; ++SI) {
    298       if (BlockFilter && !BlockFilter->count(*SI))
    299         continue;
    300       BlockChain &SuccChain = *BlockToChain[*SI];
    301       // Disregard edges within a fixed chain, or edges to the loop header.
    302       if (&Chain == &SuccChain || *SI == LoopHeaderBB)
    303         continue;
    304 
    305       // This is a cross-chain edge that is within the loop, so decrement the
    306       // loop predecessor count of the destination chain.
    307       if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
    308         BlockWorkList.push_back(*SuccChain.begin());
    309     }
    310   }
    311 }
    312 
    313 /// \brief Select the best successor for a block.
    314 ///
    315 /// This looks across all successors of a particular block and attempts to
    316 /// select the "best" one to be the layout successor. It only considers direct
    317 /// successors which also pass the block filter. It will attempt to avoid
    318 /// breaking CFG structure, but cave and break such structures in the case of
    319 /// very hot successor edges.
    320 ///
    321 /// \returns The best successor block found, or null if none are viable.
    322 MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
    323     MachineBasicBlock *BB, BlockChain &Chain,
    324     const BlockFilterSet *BlockFilter) {
    325   const BranchProbability HotProb(4, 5); // 80%
    326 
    327   MachineBasicBlock *BestSucc = 0;
    328   // FIXME: Due to the performance of the probability and weight routines in
    329   // the MBPI analysis, we manually compute probabilities using the edge
    330   // weights. This is suboptimal as it means that the somewhat subtle
    331   // definition of edge weight semantics is encoded here as well. We should
    332   // improve the MBPI interface to effeciently support query patterns such as
    333   // this.
    334   uint32_t BestWeight = 0;
    335   uint32_t WeightScale = 0;
    336   uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
    337   DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
    338   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
    339                                         SE = BB->succ_end();
    340        SI != SE; ++SI) {
    341     if (BlockFilter && !BlockFilter->count(*SI))
    342       continue;
    343     BlockChain &SuccChain = *BlockToChain[*SI];
    344     if (&SuccChain == &Chain) {
    345       DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
    346       continue;
    347     }
    348     if (*SI != *SuccChain.begin()) {
    349       DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Mid chain!\n");
    350       continue;
    351     }
    352 
    353     uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
    354     BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
    355 
    356     // Only consider successors which are either "hot", or wouldn't violate
    357     // any CFG constraints.
    358     if (SuccChain.LoopPredecessors != 0) {
    359       if (SuccProb < HotProb) {
    360         DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
    361         continue;
    362       }
    363 
    364       // Make sure that a hot successor doesn't have a globally more important
    365       // predecessor.
    366       BlockFrequency CandidateEdgeFreq
    367         = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
    368       bool BadCFGConflict = false;
    369       for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
    370                                             PE = (*SI)->pred_end();
    371            PI != PE; ++PI) {
    372         if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
    373             BlockToChain[*PI] == &Chain)
    374           continue;
    375         BlockFrequency PredEdgeFreq
    376           = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
    377         if (PredEdgeFreq >= CandidateEdgeFreq) {
    378           BadCFGConflict = true;
    379           break;
    380         }
    381       }
    382       if (BadCFGConflict) {
    383         DEBUG(dbgs() << "    " << getBlockName(*SI)
    384                                << " -> non-cold CFG conflict\n");
    385         continue;
    386       }
    387     }
    388 
    389     DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
    390                  << " (prob)"
    391                  << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
    392                  << "\n");
    393     if (BestSucc && BestWeight >= SuccWeight)
    394       continue;
    395     BestSucc = *SI;
    396     BestWeight = SuccWeight;
    397   }
    398   return BestSucc;
    399 }
    400 
    401 namespace {
    402 /// \brief Predicate struct to detect blocks already placed.
    403 class IsBlockPlaced {
    404   const BlockChain &PlacedChain;
    405   const BlockToChainMapType &BlockToChain;
    406 
    407 public:
    408   IsBlockPlaced(const BlockChain &PlacedChain,
    409                 const BlockToChainMapType &BlockToChain)
    410       : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
    411 
    412   bool operator()(MachineBasicBlock *BB) const {
    413     return BlockToChain.lookup(BB) == &PlacedChain;
    414   }
    415 };
    416 }
    417 
    418 /// \brief Select the best block from a worklist.
    419 ///
    420 /// This looks through the provided worklist as a list of candidate basic
    421 /// blocks and select the most profitable one to place. The definition of
    422 /// profitable only really makes sense in the context of a loop. This returns
    423 /// the most frequently visited block in the worklist, which in the case of
    424 /// a loop, is the one most desirable to be physically close to the rest of the
    425 /// loop body in order to improve icache behavior.
    426 ///
    427 /// \returns The best block found, or null if none are viable.
    428 MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
    429     BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
    430     const BlockFilterSet *BlockFilter) {
    431   // Once we need to walk the worklist looking for a candidate, cleanup the
    432   // worklist of already placed entries.
    433   // FIXME: If this shows up on profiles, it could be folded (at the cost of
    434   // some code complexity) into the loop below.
    435   WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
    436                                 IsBlockPlaced(Chain, BlockToChain)),
    437                  WorkList.end());
    438 
    439   MachineBasicBlock *BestBlock = 0;
    440   BlockFrequency BestFreq;
    441   for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
    442                                                       WBE = WorkList.end();
    443        WBI != WBE; ++WBI) {
    444     BlockChain &SuccChain = *BlockToChain[*WBI];
    445     if (&SuccChain == &Chain) {
    446       DEBUG(dbgs() << "    " << getBlockName(*WBI)
    447                    << " -> Already merged!\n");
    448       continue;
    449     }
    450     assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
    451 
    452     BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
    453     DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> " << CandidateFreq
    454                  << " (freq)\n");
    455     if (BestBlock && BestFreq >= CandidateFreq)
    456       continue;
    457     BestBlock = *WBI;
    458     BestFreq = CandidateFreq;
    459   }
    460   return BestBlock;
    461 }
    462 
    463 /// \brief Retrieve the first unplaced basic block.
    464 ///
    465 /// This routine is called when we are unable to use the CFG to walk through
    466 /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
    467 /// We walk through the function's blocks in order, starting from the
    468 /// LastUnplacedBlockIt. We update this iterator on each call to avoid
    469 /// re-scanning the entire sequence on repeated calls to this routine.
    470 MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
    471     MachineFunction &F, const BlockChain &PlacedChain,
    472     MachineFunction::iterator &PrevUnplacedBlockIt,
    473     const BlockFilterSet *BlockFilter) {
    474   for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
    475        ++I) {
    476     if (BlockFilter && !BlockFilter->count(I))
    477       continue;
    478     if (BlockToChain[I] != &PlacedChain) {
    479       PrevUnplacedBlockIt = I;
    480       // Now select the head of the chain to which the unplaced block belongs
    481       // as the block to place. This will force the entire chain to be placed,
    482       // and satisfies the requirements of merging chains.
    483       return *BlockToChain[I]->begin();
    484     }
    485   }
    486   return 0;
    487 }
    488 
    489 void MachineBlockPlacement::buildChain(
    490     MachineBasicBlock *BB,
    491     BlockChain &Chain,
    492     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
    493     const BlockFilterSet *BlockFilter) {
    494   assert(BB);
    495   assert(BlockToChain[BB] == &Chain);
    496   MachineFunction &F = *BB->getParent();
    497   MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
    498 
    499   MachineBasicBlock *LoopHeaderBB = BB;
    500   markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
    501   BB = *llvm::prior(Chain.end());
    502   for (;;) {
    503     assert(BB);
    504     assert(BlockToChain[BB] == &Chain);
    505     assert(*llvm::prior(Chain.end()) == BB);
    506     MachineBasicBlock *BestSucc = 0;
    507 
    508     // Look for the best viable successor if there is one to place immediately
    509     // after this block.
    510     BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
    511 
    512     // If an immediate successor isn't available, look for the best viable
    513     // block among those we've identified as not violating the loop's CFG at
    514     // this point. This won't be a fallthrough, but it will increase locality.
    515     if (!BestSucc)
    516       BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
    517 
    518     if (!BestSucc) {
    519       BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
    520                                        BlockFilter);
    521       if (!BestSucc)
    522         break;
    523 
    524       DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
    525                       "layout successor until the CFG reduces\n");
    526     }
    527 
    528     // Place this block, updating the datastructures to reflect its placement.
    529     BlockChain &SuccChain = *BlockToChain[BestSucc];
    530     // Zero out LoopPredecessors for the successor we're about to merge in case
    531     // we selected a successor that didn't fit naturally into the CFG.
    532     SuccChain.LoopPredecessors = 0;
    533     DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
    534                  << " to " << getBlockNum(BestSucc) << "\n");
    535     markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
    536     Chain.merge(BestSucc, &SuccChain);
    537     BB = *llvm::prior(Chain.end());
    538   }
    539 
    540   DEBUG(dbgs() << "Finished forming chain for header block "
    541                << getBlockNum(*Chain.begin()) << "\n");
    542 }
    543 
    544 /// \brief Find the best loop top block for layout.
    545 ///
    546 /// Look for a block which is strictly better than the loop header for laying
    547 /// out at the top of the loop. This looks for one and only one pattern:
    548 /// a latch block with no conditional exit. This block will cause a conditional
    549 /// jump around it or will be the bottom of the loop if we lay it out in place,
    550 /// but if it it doesn't end up at the bottom of the loop for any reason,
    551 /// rotation alone won't fix it. Because such a block will always result in an
    552 /// unconditional jump (for the backedge) rotating it in front of the loop
    553 /// header is always profitable.
    554 MachineBasicBlock *
    555 MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
    556                                        const BlockFilterSet &LoopBlockSet) {
    557   // Check that the header hasn't been fused with a preheader block due to
    558   // crazy branches. If it has, we need to start with the header at the top to
    559   // prevent pulling the preheader into the loop body.
    560   BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
    561   if (!LoopBlockSet.count(*HeaderChain.begin()))
    562     return L.getHeader();
    563 
    564   DEBUG(dbgs() << "Finding best loop top for: "
    565                << getBlockName(L.getHeader()) << "\n");
    566 
    567   BlockFrequency BestPredFreq;
    568   MachineBasicBlock *BestPred = 0;
    569   for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
    570                                         PE = L.getHeader()->pred_end();
    571        PI != PE; ++PI) {
    572     MachineBasicBlock *Pred = *PI;
    573     if (!LoopBlockSet.count(Pred))
    574       continue;
    575     DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", "
    576                  << Pred->succ_size() << " successors, "
    577                  << MBFI->getBlockFreq(Pred) << " freq\n");
    578     if (Pred->succ_size() > 1)
    579       continue;
    580 
    581     BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
    582     if (!BestPred || PredFreq > BestPredFreq ||
    583         (!(PredFreq < BestPredFreq) &&
    584          Pred->isLayoutSuccessor(L.getHeader()))) {
    585       BestPred = Pred;
    586       BestPredFreq = PredFreq;
    587     }
    588   }
    589 
    590   // If no direct predecessor is fine, just use the loop header.
    591   if (!BestPred)
    592     return L.getHeader();
    593 
    594   // Walk backwards through any straight line of predecessors.
    595   while (BestPred->pred_size() == 1 &&
    596          (*BestPred->pred_begin())->succ_size() == 1 &&
    597          *BestPred->pred_begin() != L.getHeader())
    598     BestPred = *BestPred->pred_begin();
    599 
    600   DEBUG(dbgs() << "    final top: " << getBlockName(BestPred) << "\n");
    601   return BestPred;
    602 }
    603 
    604 
    605 /// \brief Find the best loop exiting block for layout.
    606 ///
    607 /// This routine implements the logic to analyze the loop looking for the best
    608 /// block to layout at the top of the loop. Typically this is done to maximize
    609 /// fallthrough opportunities.
    610 MachineBasicBlock *
    611 MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
    612                                         MachineLoop &L,
    613                                         const BlockFilterSet &LoopBlockSet) {
    614   // We don't want to layout the loop linearly in all cases. If the loop header
    615   // is just a normal basic block in the loop, we want to look for what block
    616   // within the loop is the best one to layout at the top. However, if the loop
    617   // header has be pre-merged into a chain due to predecessors not having
    618   // analyzable branches, *and* the predecessor it is merged with is *not* part
    619   // of the loop, rotating the header into the middle of the loop will create
    620   // a non-contiguous range of blocks which is Very Bad. So start with the
    621   // header and only rotate if safe.
    622   BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
    623   if (!LoopBlockSet.count(*HeaderChain.begin()))
    624     return 0;
    625 
    626   BlockFrequency BestExitEdgeFreq;
    627   unsigned BestExitLoopDepth = 0;
    628   MachineBasicBlock *ExitingBB = 0;
    629   // If there are exits to outer loops, loop rotation can severely limit
    630   // fallthrough opportunites unless it selects such an exit. Keep a set of
    631   // blocks where rotating to exit with that block will reach an outer loop.
    632   SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
    633 
    634   DEBUG(dbgs() << "Finding best loop exit for: "
    635                << getBlockName(L.getHeader()) << "\n");
    636   for (MachineLoop::block_iterator I = L.block_begin(),
    637                                    E = L.block_end();
    638        I != E; ++I) {
    639     BlockChain &Chain = *BlockToChain[*I];
    640     // Ensure that this block is at the end of a chain; otherwise it could be
    641     // mid-way through an inner loop or a successor of an analyzable branch.
    642     if (*I != *llvm::prior(Chain.end()))
    643       continue;
    644 
    645     // Now walk the successors. We need to establish whether this has a viable
    646     // exiting successor and whether it has a viable non-exiting successor.
    647     // We store the old exiting state and restore it if a viable looping
    648     // successor isn't found.
    649     MachineBasicBlock *OldExitingBB = ExitingBB;
    650     BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
    651     bool HasLoopingSucc = false;
    652     // FIXME: Due to the performance of the probability and weight routines in
    653     // the MBPI analysis, we use the internal weights and manually compute the
    654     // probabilities to avoid quadratic behavior.
    655     uint32_t WeightScale = 0;
    656     uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
    657     for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
    658                                           SE = (*I)->succ_end();
    659          SI != SE; ++SI) {
    660       if ((*SI)->isLandingPad())
    661         continue;
    662       if (*SI == *I)
    663         continue;
    664       BlockChain &SuccChain = *BlockToChain[*SI];
    665       // Don't split chains, either this chain or the successor's chain.
    666       if (&Chain == &SuccChain) {
    667         DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
    668                      << getBlockName(*SI) << " (chain conflict)\n");
    669         continue;
    670       }
    671 
    672       uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
    673       if (LoopBlockSet.count(*SI)) {
    674         DEBUG(dbgs() << "    looping: " << getBlockName(*I) << " -> "
    675                      << getBlockName(*SI) << " (" << SuccWeight << ")\n");
    676         HasLoopingSucc = true;
    677         continue;
    678       }
    679 
    680       unsigned SuccLoopDepth = 0;
    681       if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
    682         SuccLoopDepth = ExitLoop->getLoopDepth();
    683         if (ExitLoop->contains(&L))
    684           BlocksExitingToOuterLoop.insert(*I);
    685       }
    686 
    687       BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
    688       BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
    689       DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
    690                    << getBlockName(*SI) << " [L:" << SuccLoopDepth
    691                    << "] (" << ExitEdgeFreq << ")\n");
    692       // Note that we slightly bias this toward an existing layout successor to
    693       // retain incoming order in the absence of better information.
    694       // FIXME: Should we bias this more strongly? It's pretty weak.
    695       if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
    696           ExitEdgeFreq > BestExitEdgeFreq ||
    697           ((*I)->isLayoutSuccessor(*SI) &&
    698            !(ExitEdgeFreq < BestExitEdgeFreq))) {
    699         BestExitEdgeFreq = ExitEdgeFreq;
    700         ExitingBB = *I;
    701       }
    702     }
    703 
    704     // Restore the old exiting state, no viable looping successor was found.
    705     if (!HasLoopingSucc) {
    706       ExitingBB = OldExitingBB;
    707       BestExitEdgeFreq = OldBestExitEdgeFreq;
    708       continue;
    709     }
    710   }
    711   // Without a candidate exiting block or with only a single block in the
    712   // loop, just use the loop header to layout the loop.
    713   if (!ExitingBB || L.getNumBlocks() == 1)
    714     return 0;
    715 
    716   // Also, if we have exit blocks which lead to outer loops but didn't select
    717   // one of them as the exiting block we are rotating toward, disable loop
    718   // rotation altogether.
    719   if (!BlocksExitingToOuterLoop.empty() &&
    720       !BlocksExitingToOuterLoop.count(ExitingBB))
    721     return 0;
    722 
    723   DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB) << "\n");
    724   return ExitingBB;
    725 }
    726 
    727 /// \brief Attempt to rotate an exiting block to the bottom of the loop.
    728 ///
    729 /// Once we have built a chain, try to rotate it to line up the hot exit block
    730 /// with fallthrough out of the loop if doing so doesn't introduce unnecessary
    731 /// branches. For example, if the loop has fallthrough into its header and out
    732 /// of its bottom already, don't rotate it.
    733 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
    734                                        MachineBasicBlock *ExitingBB,
    735                                        const BlockFilterSet &LoopBlockSet) {
    736   if (!ExitingBB)
    737     return;
    738 
    739   MachineBasicBlock *Top = *LoopChain.begin();
    740   bool ViableTopFallthrough = false;
    741   for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
    742                                         PE = Top->pred_end();
    743        PI != PE; ++PI) {
    744     BlockChain *PredChain = BlockToChain[*PI];
    745     if (!LoopBlockSet.count(*PI) &&
    746         (!PredChain || *PI == *llvm::prior(PredChain->end()))) {
    747       ViableTopFallthrough = true;
    748       break;
    749     }
    750   }
    751 
    752   // If the header has viable fallthrough, check whether the current loop
    753   // bottom is a viable exiting block. If so, bail out as rotating will
    754   // introduce an unnecessary branch.
    755   if (ViableTopFallthrough) {
    756     MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end());
    757     for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
    758                                           SE = Bottom->succ_end();
    759          SI != SE; ++SI) {
    760       BlockChain *SuccChain = BlockToChain[*SI];
    761       if (!LoopBlockSet.count(*SI) &&
    762           (!SuccChain || *SI == *SuccChain->begin()))
    763         return;
    764     }
    765   }
    766 
    767   BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
    768                                           ExitingBB);
    769   if (ExitIt == LoopChain.end())
    770     return;
    771 
    772   std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end());
    773 }
    774 
    775 /// \brief Forms basic block chains from the natural loop structures.
    776 ///
    777 /// These chains are designed to preserve the existing *structure* of the code
    778 /// as much as possible. We can then stitch the chains together in a way which
    779 /// both preserves the topological structure and minimizes taken conditional
    780 /// branches.
    781 void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
    782                                             MachineLoop &L) {
    783   // First recurse through any nested loops, building chains for those inner
    784   // loops.
    785   for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
    786     buildLoopChains(F, **LI);
    787 
    788   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
    789   BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
    790 
    791   // First check to see if there is an obviously preferable top block for the
    792   // loop. This will default to the header, but may end up as one of the
    793   // predecessors to the header if there is one which will result in strictly
    794   // fewer branches in the loop body.
    795   MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
    796 
    797   // If we selected just the header for the loop top, look for a potentially
    798   // profitable exit block in the event that rotating the loop can eliminate
    799   // branches by placing an exit edge at the bottom.
    800   MachineBasicBlock *ExitingBB = 0;
    801   if (LoopTop == L.getHeader())
    802     ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
    803 
    804   BlockChain &LoopChain = *BlockToChain[LoopTop];
    805 
    806   // FIXME: This is a really lame way of walking the chains in the loop: we
    807   // walk the blocks, and use a set to prevent visiting a particular chain
    808   // twice.
    809   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
    810   assert(LoopChain.LoopPredecessors == 0);
    811   UpdatedPreds.insert(&LoopChain);
    812   for (MachineLoop::block_iterator BI = L.block_begin(),
    813                                    BE = L.block_end();
    814        BI != BE; ++BI) {
    815     BlockChain &Chain = *BlockToChain[*BI];
    816     if (!UpdatedPreds.insert(&Chain))
    817       continue;
    818 
    819     assert(Chain.LoopPredecessors == 0);
    820     for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
    821          BCI != BCE; ++BCI) {
    822       assert(BlockToChain[*BCI] == &Chain);
    823       for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
    824                                             PE = (*BCI)->pred_end();
    825            PI != PE; ++PI) {
    826         if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
    827           continue;
    828         ++Chain.LoopPredecessors;
    829       }
    830     }
    831 
    832     if (Chain.LoopPredecessors == 0)
    833       BlockWorkList.push_back(*Chain.begin());
    834   }
    835 
    836   buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
    837   rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
    838 
    839   DEBUG({
    840     // Crash at the end so we get all of the debugging output first.
    841     bool BadLoop = false;
    842     if (LoopChain.LoopPredecessors) {
    843       BadLoop = true;
    844       dbgs() << "Loop chain contains a block without its preds placed!\n"
    845              << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
    846              << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
    847     }
    848     for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
    849          BCI != BCE; ++BCI) {
    850       dbgs() << "          ... " << getBlockName(*BCI) << "\n";
    851       if (!LoopBlockSet.erase(*BCI)) {
    852         // We don't mark the loop as bad here because there are real situations
    853         // where this can occur. For example, with an unanalyzable fallthrough
    854         // from a loop block to a non-loop block or vice versa.
    855         dbgs() << "Loop chain contains a block not contained by the loop!\n"
    856                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
    857                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
    858                << "  Bad block:    " << getBlockName(*BCI) << "\n";
    859       }
    860     }
    861 
    862     if (!LoopBlockSet.empty()) {
    863       BadLoop = true;
    864       for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
    865                                     LBE = LoopBlockSet.end();
    866            LBI != LBE; ++LBI)
    867         dbgs() << "Loop contains blocks never placed into a chain!\n"
    868                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
    869                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
    870                << "  Bad block:    " << getBlockName(*LBI) << "\n";
    871     }
    872     assert(!BadLoop && "Detected problems with the placement of this loop.");
    873   });
    874 }
    875 
    876 void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
    877   // Ensure that every BB in the function has an associated chain to simplify
    878   // the assumptions of the remaining algorithm.
    879   SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
    880   for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
    881     MachineBasicBlock *BB = FI;
    882     BlockChain *Chain
    883       = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
    884     // Also, merge any blocks which we cannot reason about and must preserve
    885     // the exact fallthrough behavior for.
    886     for (;;) {
    887       Cond.clear();
    888       MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
    889       if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
    890         break;
    891 
    892       MachineFunction::iterator NextFI(llvm::next(FI));
    893       MachineBasicBlock *NextBB = NextFI;
    894       // Ensure that the layout successor is a viable block, as we know that
    895       // fallthrough is a possibility.
    896       assert(NextFI != FE && "Can't fallthrough past the last block.");
    897       DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
    898                    << getBlockName(BB) << " -> " << getBlockName(NextBB)
    899                    << "\n");
    900       Chain->merge(NextBB, 0);
    901       FI = NextFI;
    902       BB = NextBB;
    903     }
    904   }
    905 
    906   // Build any loop-based chains.
    907   for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
    908        ++LI)
    909     buildLoopChains(F, **LI);
    910 
    911   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
    912 
    913   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
    914   for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
    915     MachineBasicBlock *BB = &*FI;
    916     BlockChain &Chain = *BlockToChain[BB];
    917     if (!UpdatedPreds.insert(&Chain))
    918       continue;
    919 
    920     assert(Chain.LoopPredecessors == 0);
    921     for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
    922          BCI != BCE; ++BCI) {
    923       assert(BlockToChain[*BCI] == &Chain);
    924       for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
    925                                             PE = (*BCI)->pred_end();
    926            PI != PE; ++PI) {
    927         if (BlockToChain[*PI] == &Chain)
    928           continue;
    929         ++Chain.LoopPredecessors;
    930       }
    931     }
    932 
    933     if (Chain.LoopPredecessors == 0)
    934       BlockWorkList.push_back(*Chain.begin());
    935   }
    936 
    937   BlockChain &FunctionChain = *BlockToChain[&F.front()];
    938   buildChain(&F.front(), FunctionChain, BlockWorkList);
    939 
    940   typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
    941   DEBUG({
    942     // Crash at the end so we get all of the debugging output first.
    943     bool BadFunc = false;
    944     FunctionBlockSetType FunctionBlockSet;
    945     for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
    946       FunctionBlockSet.insert(FI);
    947 
    948     for (BlockChain::iterator BCI = FunctionChain.begin(),
    949                               BCE = FunctionChain.end();
    950          BCI != BCE; ++BCI)
    951       if (!FunctionBlockSet.erase(*BCI)) {
    952         BadFunc = true;
    953         dbgs() << "Function chain contains a block not in the function!\n"
    954                << "  Bad block:    " << getBlockName(*BCI) << "\n";
    955       }
    956 
    957     if (!FunctionBlockSet.empty()) {
    958       BadFunc = true;
    959       for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
    960                                           FBE = FunctionBlockSet.end();
    961            FBI != FBE; ++FBI)
    962         dbgs() << "Function contains blocks never placed into a chain!\n"
    963                << "  Bad block:    " << getBlockName(*FBI) << "\n";
    964     }
    965     assert(!BadFunc && "Detected problems with the block placement.");
    966   });
    967 
    968   // Splice the blocks into place.
    969   MachineFunction::iterator InsertPos = F.begin();
    970   for (BlockChain::iterator BI = FunctionChain.begin(),
    971                             BE = FunctionChain.end();
    972        BI != BE; ++BI) {
    973     DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
    974                                                   : "          ... ")
    975           << getBlockName(*BI) << "\n");
    976     if (InsertPos != MachineFunction::iterator(*BI))
    977       F.splice(InsertPos, *BI);
    978     else
    979       ++InsertPos;
    980 
    981     // Update the terminator of the previous block.
    982     if (BI == FunctionChain.begin())
    983       continue;
    984     MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
    985 
    986     // FIXME: It would be awesome of updateTerminator would just return rather
    987     // than assert when the branch cannot be analyzed in order to remove this
    988     // boiler plate.
    989     Cond.clear();
    990     MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
    991     if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
    992       PrevBB->updateTerminator();
    993   }
    994 
    995   // Fixup the last block.
    996   Cond.clear();
    997   MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
    998   if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
    999     F.back().updateTerminator();
   1000 
   1001   // Walk through the backedges of the function now that we have fully laid out
   1002   // the basic blocks and align the destination of each backedge. We don't rely
   1003   // on the loop info here so that we can align backedges in unnatural CFGs and
   1004   // backedges that were introduced purely because of the loop rotations done
   1005   // during this layout pass.
   1006   // FIXME: This isn't quite right, we shouldn't align backedges that result
   1007   // from blocks being sunken below the exit block for the function.
   1008   if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
   1009     return;
   1010   unsigned Align = TLI->getPrefLoopAlignment();
   1011   if (!Align)
   1012     return;  // Don't care about loop alignment.
   1013 
   1014   SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks;
   1015   for (BlockChain::iterator BI = FunctionChain.begin(),
   1016                             BE = FunctionChain.end();
   1017        BI != BE; ++BI) {
   1018     PreviousBlocks.insert(*BI);
   1019     // Set alignment on the destination of all the back edges in the new
   1020     // ordering.
   1021     for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(),
   1022                                           SE = (*BI)->succ_end();
   1023          SI != SE; ++SI)
   1024       if (PreviousBlocks.count(*SI))
   1025         (*SI)->setAlignment(Align);
   1026   }
   1027 }
   1028 
   1029 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   1030   // Check for single-block functions and skip them.
   1031   if (llvm::next(F.begin()) == F.end())
   1032     return false;
   1033 
   1034   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   1035   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   1036   MLI = &getAnalysis<MachineLoopInfo>();
   1037   TII = F.getTarget().getInstrInfo();
   1038   TLI = F.getTarget().getTargetLowering();
   1039   assert(BlockToChain.empty());
   1040 
   1041   buildCFGChains(F);
   1042 
   1043   BlockToChain.clear();
   1044   ChainAllocator.DestroyAll();
   1045 
   1046   // We always return true as we have no way to track whether the final order
   1047   // differs from the original order.
   1048   return true;
   1049 }
   1050 
   1051 namespace {
   1052 /// \brief A pass to compute block placement statistics.
   1053 ///
   1054 /// A separate pass to compute interesting statistics for evaluating block
   1055 /// placement. This is separate from the actual placement pass so that they can
   1056 /// be computed in the absense of any placement transformations or when using
   1057 /// alternative placement strategies.
   1058 class MachineBlockPlacementStats : public MachineFunctionPass {
   1059   /// \brief A handle to the branch probability pass.
   1060   const MachineBranchProbabilityInfo *MBPI;
   1061 
   1062   /// \brief A handle to the function-wide block frequency pass.
   1063   const MachineBlockFrequencyInfo *MBFI;
   1064 
   1065 public:
   1066   static char ID; // Pass identification, replacement for typeid
   1067   MachineBlockPlacementStats() : MachineFunctionPass(ID) {
   1068     initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
   1069   }
   1070 
   1071   bool runOnMachineFunction(MachineFunction &F);
   1072 
   1073   void getAnalysisUsage(AnalysisUsage &AU) const {
   1074     AU.addRequired<MachineBranchProbabilityInfo>();
   1075     AU.addRequired<MachineBlockFrequencyInfo>();
   1076     AU.setPreservesAll();
   1077     MachineFunctionPass::getAnalysisUsage(AU);
   1078   }
   1079 };
   1080 }
   1081 
   1082 char MachineBlockPlacementStats::ID = 0;
   1083 char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
   1084 INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
   1085                       "Basic Block Placement Stats", false, false)
   1086 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
   1087 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
   1088 INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
   1089                     "Basic Block Placement Stats", false, false)
   1090 
   1091 bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
   1092   // Check for single-block functions and skip them.
   1093   if (llvm::next(F.begin()) == F.end())
   1094     return false;
   1095 
   1096   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   1097   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   1098 
   1099   for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
   1100     BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
   1101     Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
   1102                                                   : NumUncondBranches;
   1103     Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
   1104                                                       : UncondBranchTakenFreq;
   1105     for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
   1106                                           SE = I->succ_end();
   1107          SI != SE; ++SI) {
   1108       // Skip if this successor is a fallthrough.
   1109       if (I->isLayoutSuccessor(*SI))
   1110         continue;
   1111 
   1112       BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
   1113       ++NumBranches;
   1114       BranchTakenFreq += EdgeFreq.getFrequency();
   1115     }
   1116   }
   1117 
   1118   return false;
   1119 }
   1120 
   1121