Home | History | Annotate | Download | only in CodeGen
      1 //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements basic block placement transformations using the CFG
     11 // structure and branch probability estimates.
     12 //
     13 // The pass strives to preserve the structure of the CFG (that is, retain
     14 // a topological ordering of basic blocks) in the absence of a *strong* signal
     15 // to the contrary from probabilities. However, within the CFG structure, it
     16 // attempts to choose an ordering which favors placing more likely sequences of
     17 // blocks adjacent to each other.
     18 //
     19 // The algorithm works from the inner-most loop within a function outward, and
     20 // at each stage walks through the basic blocks, trying to coalesce them into
     21 // sequential chains where allowed by the CFG (or demanded by heavy
     22 // probabilities). Finally, it walks the blocks in topological order, and the
     23 // first time it reaches a chain of basic blocks, it schedules them in the
     24 // function in-order.
     25 //
     26 //===----------------------------------------------------------------------===//
     27 
     28 #define DEBUG_TYPE "block-placement2"
     29 #include "llvm/CodeGen/Passes.h"
     30 #include "llvm/ADT/DenseMap.h"
     31 #include "llvm/ADT/SmallPtrSet.h"
     32 #include "llvm/ADT/SmallVector.h"
     33 #include "llvm/ADT/Statistic.h"
     34 #include "llvm/CodeGen/MachineBasicBlock.h"
     35 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
     36 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
     37 #include "llvm/CodeGen/MachineFunction.h"
     38 #include "llvm/CodeGen/MachineFunctionPass.h"
     39 #include "llvm/CodeGen/MachineLoopInfo.h"
     40 #include "llvm/CodeGen/MachineModuleInfo.h"
     41 #include "llvm/Support/Allocator.h"
     42 #include "llvm/Support/CommandLine.h"
     43 #include "llvm/Support/Debug.h"
     44 #include "llvm/Target/TargetInstrInfo.h"
     45 #include "llvm/Target/TargetLowering.h"
     46 #include <algorithm>
     47 using namespace llvm;
     48 
     49 STATISTIC(NumCondBranches, "Number of conditional branches");
     50 STATISTIC(NumUncondBranches, "Number of uncondittional branches");
     51 STATISTIC(CondBranchTakenFreq,
     52           "Potential frequency of taking conditional branches");
     53 STATISTIC(UncondBranchTakenFreq,
     54           "Potential frequency of taking unconditional branches");
     55 
     56 static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
     57                                        cl::desc("Force the alignment of all "
     58                                                 "blocks in the function."),
     59                                        cl::init(0), cl::Hidden);
     60 
     61 namespace {
     62 class BlockChain;
     63 /// \brief Type for our function-wide basic block -> block chain mapping.
     64 typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
     65 }
     66 
     67 namespace {
     68 /// \brief A chain of blocks which will be laid out contiguously.
     69 ///
     70 /// This is the datastructure representing a chain of consecutive blocks that
     71 /// are profitable to layout together in order to maximize fallthrough
     72 /// probabilities and code locality. We also can use a block chain to represent
     73 /// a sequence of basic blocks which have some external (correctness)
     74 /// requirement for sequential layout.
     75 ///
     76 /// Chains can be built around a single basic block and can be merged to grow
     77 /// them. They participate in a block-to-chain mapping, which is updated
     78 /// automatically as chains are merged together.
     79 class BlockChain {
     80   /// \brief The sequence of blocks belonging to this chain.
     81   ///
     82   /// This is the sequence of blocks for a particular chain. These will be laid
     83   /// out in-order within the function.
     84   SmallVector<MachineBasicBlock *, 4> Blocks;
     85 
     86   /// \brief A handle to the function-wide basic block to block chain mapping.
     87   ///
     88   /// This is retained in each block chain to simplify the computation of child
     89   /// block chains for SCC-formation and iteration. We store the edges to child
     90   /// basic blocks, and map them back to their associated chains using this
     91   /// structure.
     92   BlockToChainMapType &BlockToChain;
     93 
     94 public:
     95   /// \brief Construct a new BlockChain.
     96   ///
     97   /// This builds a new block chain representing a single basic block in the
     98   /// function. It also registers itself as the chain that block participates
     99   /// in with the BlockToChain mapping.
    100   BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
    101     : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
    102     assert(BB && "Cannot create a chain with a null basic block");
    103     BlockToChain[BB] = this;
    104   }
    105 
    106   /// \brief Iterator over blocks within the chain.
    107   typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
    108 
    109   /// \brief Beginning of blocks within the chain.
    110   iterator begin() { return Blocks.begin(); }
    111 
    112   /// \brief End of blocks within the chain.
    113   iterator end() { return Blocks.end(); }
    114 
    115   /// \brief Merge a block chain into this one.
    116   ///
    117   /// This routine merges a block chain into this one. It takes care of forming
    118   /// a contiguous sequence of basic blocks, updating the edge list, and
    119   /// updating the block -> chain mapping. It does not free or tear down the
    120   /// old chain, but the old chain's block list is no longer valid.
    121   void merge(MachineBasicBlock *BB, BlockChain *Chain) {
    122     assert(BB);
    123     assert(!Blocks.empty());
    124 
    125     // Fast path in case we don't have a chain already.
    126     if (!Chain) {
    127       assert(!BlockToChain[BB]);
    128       Blocks.push_back(BB);
    129       BlockToChain[BB] = this;
    130       return;
    131     }
    132 
    133     assert(BB == *Chain->begin());
    134     assert(Chain->begin() != Chain->end());
    135 
    136     // Update the incoming blocks to point to this chain, and add them to the
    137     // chain structure.
    138     for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
    139          BI != BE; ++BI) {
    140       Blocks.push_back(*BI);
    141       assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
    142       BlockToChain[*BI] = this;
    143     }
    144   }
    145 
    146 #ifndef NDEBUG
    147   /// \brief Dump the blocks in this chain.
    148   void dump() LLVM_ATTRIBUTE_USED {
    149     for (iterator I = begin(), E = end(); I != E; ++I)
    150       (*I)->dump();
    151   }
    152 #endif // NDEBUG
    153 
    154   /// \brief Count of predecessors within the loop currently being processed.
    155   ///
    156   /// This count is updated at each loop we process to represent the number of
    157   /// in-loop predecessors of this chain.
    158   unsigned LoopPredecessors;
    159 };
    160 }
    161 
    162 namespace {
    163 class MachineBlockPlacement : public MachineFunctionPass {
    164   /// \brief A typedef for a block filter set.
    165   typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
    166 
    167   /// \brief A handle to the branch probability pass.
    168   const MachineBranchProbabilityInfo *MBPI;
    169 
    170   /// \brief A handle to the function-wide block frequency pass.
    171   const MachineBlockFrequencyInfo *MBFI;
    172 
    173   /// \brief A handle to the loop info.
    174   const MachineLoopInfo *MLI;
    175 
    176   /// \brief A handle to the target's instruction info.
    177   const TargetInstrInfo *TII;
    178 
    179   /// \brief A handle to the target's lowering info.
    180   const TargetLoweringBase *TLI;
    181 
    182   /// \brief Allocator and owner of BlockChain structures.
    183   ///
    184   /// We build BlockChains lazily while processing the loop structure of
    185   /// a function. To reduce malloc traffic, we allocate them using this
    186   /// slab-like allocator, and destroy them after the pass completes. An
    187   /// important guarantee is that this allocator produces stable pointers to
    188   /// the chains.
    189   SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
    190 
    191   /// \brief Function wide BasicBlock to BlockChain mapping.
    192   ///
    193   /// This mapping allows efficiently moving from any given basic block to the
    194   /// BlockChain it participates in, if any. We use it to, among other things,
    195   /// allow implicitly defining edges between chains as the existing edges
    196   /// between basic blocks.
    197   DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
    198 
    199   void markChainSuccessors(BlockChain &Chain,
    200                            MachineBasicBlock *LoopHeaderBB,
    201                            SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
    202                            const BlockFilterSet *BlockFilter = 0);
    203   MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
    204                                          BlockChain &Chain,
    205                                          const BlockFilterSet *BlockFilter);
    206   MachineBasicBlock *selectBestCandidateBlock(
    207       BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
    208       const BlockFilterSet *BlockFilter);
    209   MachineBasicBlock *getFirstUnplacedBlock(
    210       MachineFunction &F,
    211       const BlockChain &PlacedChain,
    212       MachineFunction::iterator &PrevUnplacedBlockIt,
    213       const BlockFilterSet *BlockFilter);
    214   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
    215                   SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
    216                   const BlockFilterSet *BlockFilter = 0);
    217   MachineBasicBlock *findBestLoopTop(MachineLoop &L,
    218                                      const BlockFilterSet &LoopBlockSet);
    219   MachineBasicBlock *findBestLoopExit(MachineFunction &F,
    220                                       MachineLoop &L,
    221                                       const BlockFilterSet &LoopBlockSet);
    222   void buildLoopChains(MachineFunction &F, MachineLoop &L);
    223   void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
    224                   const BlockFilterSet &LoopBlockSet);
    225   void buildCFGChains(MachineFunction &F);
    226 
    227 public:
    228   static char ID; // Pass identification, replacement for typeid
    229   MachineBlockPlacement() : MachineFunctionPass(ID) {
    230     initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
    231   }
    232 
    233   bool runOnMachineFunction(MachineFunction &F);
    234 
    235   void getAnalysisUsage(AnalysisUsage &AU) const {
    236     AU.addRequired<MachineBranchProbabilityInfo>();
    237     AU.addRequired<MachineBlockFrequencyInfo>();
    238     AU.addRequired<MachineLoopInfo>();
    239     MachineFunctionPass::getAnalysisUsage(AU);
    240   }
    241 };
    242 }
    243 
    244 char MachineBlockPlacement::ID = 0;
    245 char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
    246 INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
    247                       "Branch Probability Basic Block Placement", false, false)
    248 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
    249 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
    250 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    251 INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
    252                     "Branch Probability Basic Block Placement", false, false)
    253 
    254 #ifndef NDEBUG
    255 /// \brief Helper to print the name of a MBB.
    256 ///
    257 /// Only used by debug logging.
    258 static std::string getBlockName(MachineBasicBlock *BB) {
    259   std::string Result;
    260   raw_string_ostream OS(Result);
    261   OS << "BB#" << BB->getNumber()
    262      << " (derived from LLVM BB '" << BB->getName() << "')";
    263   OS.flush();
    264   return Result;
    265 }
    266 
    267 /// \brief Helper to print the number of a MBB.
    268 ///
    269 /// Only used by debug logging.
    270 static std::string getBlockNum(MachineBasicBlock *BB) {
    271   std::string Result;
    272   raw_string_ostream OS(Result);
    273   OS << "BB#" << BB->getNumber();
    274   OS.flush();
    275   return Result;
    276 }
    277 #endif
    278 
    279 /// \brief Mark a chain's successors as having one fewer preds.
    280 ///
    281 /// When a chain is being merged into the "placed" chain, this routine will
    282 /// quickly walk the successors of each block in the chain and mark them as
    283 /// having one fewer active predecessor. It also adds any successors of this
    284 /// chain which reach the zero-predecessor state to the worklist passed in.
    285 void MachineBlockPlacement::markChainSuccessors(
    286     BlockChain &Chain,
    287     MachineBasicBlock *LoopHeaderBB,
    288     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
    289     const BlockFilterSet *BlockFilter) {
    290   // Walk all the blocks in this chain, marking their successors as having
    291   // a predecessor placed.
    292   for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
    293        CBI != CBE; ++CBI) {
    294     // Add any successors for which this is the only un-placed in-loop
    295     // predecessor to the worklist as a viable candidate for CFG-neutral
    296     // placement. No subsequent placement of this block will violate the CFG
    297     // shape, so we get to use heuristics to choose a favorable placement.
    298     for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
    299                                           SE = (*CBI)->succ_end();
    300          SI != SE; ++SI) {
    301       if (BlockFilter && !BlockFilter->count(*SI))
    302         continue;
    303       BlockChain &SuccChain = *BlockToChain[*SI];
    304       // Disregard edges within a fixed chain, or edges to the loop header.
    305       if (&Chain == &SuccChain || *SI == LoopHeaderBB)
    306         continue;
    307 
    308       // This is a cross-chain edge that is within the loop, so decrement the
    309       // loop predecessor count of the destination chain.
    310       if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
    311         BlockWorkList.push_back(*SuccChain.begin());
    312     }
    313   }
    314 }
    315 
    316 /// \brief Select the best successor for a block.
    317 ///
    318 /// This looks across all successors of a particular block and attempts to
    319 /// select the "best" one to be the layout successor. It only considers direct
    320 /// successors which also pass the block filter. It will attempt to avoid
    321 /// breaking CFG structure, but cave and break such structures in the case of
    322 /// very hot successor edges.
    323 ///
    324 /// \returns The best successor block found, or null if none are viable.
    325 MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
    326     MachineBasicBlock *BB, BlockChain &Chain,
    327     const BlockFilterSet *BlockFilter) {
    328   const BranchProbability HotProb(4, 5); // 80%
    329 
    330   MachineBasicBlock *BestSucc = 0;
    331   // FIXME: Due to the performance of the probability and weight routines in
    332   // the MBPI analysis, we manually compute probabilities using the edge
    333   // weights. This is suboptimal as it means that the somewhat subtle
    334   // definition of edge weight semantics is encoded here as well. We should
    335   // improve the MBPI interface to efficiently support query patterns such as
    336   // this.
    337   uint32_t BestWeight = 0;
    338   uint32_t WeightScale = 0;
    339   uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
    340   DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
    341   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
    342                                         SE = BB->succ_end();
    343        SI != SE; ++SI) {
    344     if (BlockFilter && !BlockFilter->count(*SI))
    345       continue;
    346     BlockChain &SuccChain = *BlockToChain[*SI];
    347     if (&SuccChain == &Chain) {
    348       DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
    349       continue;
    350     }
    351     if (*SI != *SuccChain.begin()) {
    352       DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Mid chain!\n");
    353       continue;
    354     }
    355 
    356     uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
    357     BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
    358 
    359     // Only consider successors which are either "hot", or wouldn't violate
    360     // any CFG constraints.
    361     if (SuccChain.LoopPredecessors != 0) {
    362       if (SuccProb < HotProb) {
    363         DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
    364         continue;
    365       }
    366 
    367       // Make sure that a hot successor doesn't have a globally more important
    368       // predecessor.
    369       BlockFrequency CandidateEdgeFreq
    370         = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
    371       bool BadCFGConflict = false;
    372       for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
    373                                             PE = (*SI)->pred_end();
    374            PI != PE; ++PI) {
    375         if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
    376             BlockToChain[*PI] == &Chain)
    377           continue;
    378         BlockFrequency PredEdgeFreq
    379           = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
    380         if (PredEdgeFreq >= CandidateEdgeFreq) {
    381           BadCFGConflict = true;
    382           break;
    383         }
    384       }
    385       if (BadCFGConflict) {
    386         DEBUG(dbgs() << "    " << getBlockName(*SI)
    387                                << " -> non-cold CFG conflict\n");
    388         continue;
    389       }
    390     }
    391 
    392     DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
    393                  << " (prob)"
    394                  << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
    395                  << "\n");
    396     if (BestSucc && BestWeight >= SuccWeight)
    397       continue;
    398     BestSucc = *SI;
    399     BestWeight = SuccWeight;
    400   }
    401   return BestSucc;
    402 }
    403 
    404 namespace {
    405 /// \brief Predicate struct to detect blocks already placed.
    406 class IsBlockPlaced {
    407   const BlockChain &PlacedChain;
    408   const BlockToChainMapType &BlockToChain;
    409 
    410 public:
    411   IsBlockPlaced(const BlockChain &PlacedChain,
    412                 const BlockToChainMapType &BlockToChain)
    413       : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
    414 
    415   bool operator()(MachineBasicBlock *BB) const {
    416     return BlockToChain.lookup(BB) == &PlacedChain;
    417   }
    418 };
    419 }
    420 
    421 /// \brief Select the best block from a worklist.
    422 ///
    423 /// This looks through the provided worklist as a list of candidate basic
    424 /// blocks and select the most profitable one to place. The definition of
    425 /// profitable only really makes sense in the context of a loop. This returns
    426 /// the most frequently visited block in the worklist, which in the case of
    427 /// a loop, is the one most desirable to be physically close to the rest of the
    428 /// loop body in order to improve icache behavior.
    429 ///
    430 /// \returns The best block found, or null if none are viable.
    431 MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
    432     BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
    433     const BlockFilterSet *BlockFilter) {
    434   // Once we need to walk the worklist looking for a candidate, cleanup the
    435   // worklist of already placed entries.
    436   // FIXME: If this shows up on profiles, it could be folded (at the cost of
    437   // some code complexity) into the loop below.
    438   WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
    439                                 IsBlockPlaced(Chain, BlockToChain)),
    440                  WorkList.end());
    441 
    442   MachineBasicBlock *BestBlock = 0;
    443   BlockFrequency BestFreq;
    444   for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
    445                                                       WBE = WorkList.end();
    446        WBI != WBE; ++WBI) {
    447     BlockChain &SuccChain = *BlockToChain[*WBI];
    448     if (&SuccChain == &Chain) {
    449       DEBUG(dbgs() << "    " << getBlockName(*WBI)
    450                    << " -> Already merged!\n");
    451       continue;
    452     }
    453     assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
    454 
    455     BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
    456     DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> " << CandidateFreq
    457                  << " (freq)\n");
    458     if (BestBlock && BestFreq >= CandidateFreq)
    459       continue;
    460     BestBlock = *WBI;
    461     BestFreq = CandidateFreq;
    462   }
    463   return BestBlock;
    464 }
    465 
    466 /// \brief Retrieve the first unplaced basic block.
    467 ///
    468 /// This routine is called when we are unable to use the CFG to walk through
    469 /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
    470 /// We walk through the function's blocks in order, starting from the
    471 /// LastUnplacedBlockIt. We update this iterator on each call to avoid
    472 /// re-scanning the entire sequence on repeated calls to this routine.
    473 MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
    474     MachineFunction &F, const BlockChain &PlacedChain,
    475     MachineFunction::iterator &PrevUnplacedBlockIt,
    476     const BlockFilterSet *BlockFilter) {
    477   for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
    478        ++I) {
    479     if (BlockFilter && !BlockFilter->count(I))
    480       continue;
    481     if (BlockToChain[I] != &PlacedChain) {
    482       PrevUnplacedBlockIt = I;
    483       // Now select the head of the chain to which the unplaced block belongs
    484       // as the block to place. This will force the entire chain to be placed,
    485       // and satisfies the requirements of merging chains.
    486       return *BlockToChain[I]->begin();
    487     }
    488   }
    489   return 0;
    490 }
    491 
    492 void MachineBlockPlacement::buildChain(
    493     MachineBasicBlock *BB,
    494     BlockChain &Chain,
    495     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
    496     const BlockFilterSet *BlockFilter) {
    497   assert(BB);
    498   assert(BlockToChain[BB] == &Chain);
    499   MachineFunction &F = *BB->getParent();
    500   MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
    501 
    502   MachineBasicBlock *LoopHeaderBB = BB;
    503   markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
    504   BB = *llvm::prior(Chain.end());
    505   for (;;) {
    506     assert(BB);
    507     assert(BlockToChain[BB] == &Chain);
    508     assert(*llvm::prior(Chain.end()) == BB);
    509 
    510     // Look for the best viable successor if there is one to place immediately
    511     // after this block.
    512     MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
    513 
    514     // If an immediate successor isn't available, look for the best viable
    515     // block among those we've identified as not violating the loop's CFG at
    516     // this point. This won't be a fallthrough, but it will increase locality.
    517     if (!BestSucc)
    518       BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
    519 
    520     if (!BestSucc) {
    521       BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
    522                                        BlockFilter);
    523       if (!BestSucc)
    524         break;
    525 
    526       DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
    527                       "layout successor until the CFG reduces\n");
    528     }
    529 
    530     // Place this block, updating the datastructures to reflect its placement.
    531     BlockChain &SuccChain = *BlockToChain[BestSucc];
    532     // Zero out LoopPredecessors for the successor we're about to merge in case
    533     // we selected a successor that didn't fit naturally into the CFG.
    534     SuccChain.LoopPredecessors = 0;
    535     DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
    536                  << " to " << getBlockNum(BestSucc) << "\n");
    537     markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
    538     Chain.merge(BestSucc, &SuccChain);
    539     BB = *llvm::prior(Chain.end());
    540   }
    541 
    542   DEBUG(dbgs() << "Finished forming chain for header block "
    543                << getBlockNum(*Chain.begin()) << "\n");
    544 }
    545 
    546 /// \brief Find the best loop top block for layout.
    547 ///
    548 /// Look for a block which is strictly better than the loop header for laying
    549 /// out at the top of the loop. This looks for one and only one pattern:
    550 /// a latch block with no conditional exit. This block will cause a conditional
    551 /// jump around it or will be the bottom of the loop if we lay it out in place,
    552 /// but if it it doesn't end up at the bottom of the loop for any reason,
    553 /// rotation alone won't fix it. Because such a block will always result in an
    554 /// unconditional jump (for the backedge) rotating it in front of the loop
    555 /// header is always profitable.
    556 MachineBasicBlock *
    557 MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
    558                                        const BlockFilterSet &LoopBlockSet) {
    559   // Check that the header hasn't been fused with a preheader block due to
    560   // crazy branches. If it has, we need to start with the header at the top to
    561   // prevent pulling the preheader into the loop body.
    562   BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
    563   if (!LoopBlockSet.count(*HeaderChain.begin()))
    564     return L.getHeader();
    565 
    566   DEBUG(dbgs() << "Finding best loop top for: "
    567                << getBlockName(L.getHeader()) << "\n");
    568 
    569   BlockFrequency BestPredFreq;
    570   MachineBasicBlock *BestPred = 0;
    571   for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
    572                                         PE = L.getHeader()->pred_end();
    573        PI != PE; ++PI) {
    574     MachineBasicBlock *Pred = *PI;
    575     if (!LoopBlockSet.count(Pred))
    576       continue;
    577     DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", "
    578                  << Pred->succ_size() << " successors, "
    579                  << MBFI->getBlockFreq(Pred) << " freq\n");
    580     if (Pred->succ_size() > 1)
    581       continue;
    582 
    583     BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
    584     if (!BestPred || PredFreq > BestPredFreq ||
    585         (!(PredFreq < BestPredFreq) &&
    586          Pred->isLayoutSuccessor(L.getHeader()))) {
    587       BestPred = Pred;
    588       BestPredFreq = PredFreq;
    589     }
    590   }
    591 
    592   // If no direct predecessor is fine, just use the loop header.
    593   if (!BestPred)
    594     return L.getHeader();
    595 
    596   // Walk backwards through any straight line of predecessors.
    597   while (BestPred->pred_size() == 1 &&
    598          (*BestPred->pred_begin())->succ_size() == 1 &&
    599          *BestPred->pred_begin() != L.getHeader())
    600     BestPred = *BestPred->pred_begin();
    601 
    602   DEBUG(dbgs() << "    final top: " << getBlockName(BestPred) << "\n");
    603   return BestPred;
    604 }
    605 
    606 
    607 /// \brief Find the best loop exiting block for layout.
    608 ///
    609 /// This routine implements the logic to analyze the loop looking for the best
    610 /// block to layout at the top of the loop. Typically this is done to maximize
    611 /// fallthrough opportunities.
    612 MachineBasicBlock *
    613 MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
    614                                         MachineLoop &L,
    615                                         const BlockFilterSet &LoopBlockSet) {
    616   // We don't want to layout the loop linearly in all cases. If the loop header
    617   // is just a normal basic block in the loop, we want to look for what block
    618   // within the loop is the best one to layout at the top. However, if the loop
    619   // header has be pre-merged into a chain due to predecessors not having
    620   // analyzable branches, *and* the predecessor it is merged with is *not* part
    621   // of the loop, rotating the header into the middle of the loop will create
    622   // a non-contiguous range of blocks which is Very Bad. So start with the
    623   // header and only rotate if safe.
    624   BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
    625   if (!LoopBlockSet.count(*HeaderChain.begin()))
    626     return 0;
    627 
    628   BlockFrequency BestExitEdgeFreq;
    629   unsigned BestExitLoopDepth = 0;
    630   MachineBasicBlock *ExitingBB = 0;
    631   // If there are exits to outer loops, loop rotation can severely limit
    632   // fallthrough opportunites unless it selects such an exit. Keep a set of
    633   // blocks where rotating to exit with that block will reach an outer loop.
    634   SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
    635 
    636   DEBUG(dbgs() << "Finding best loop exit for: "
    637                << getBlockName(L.getHeader()) << "\n");
    638   for (MachineLoop::block_iterator I = L.block_begin(),
    639                                    E = L.block_end();
    640        I != E; ++I) {
    641     BlockChain &Chain = *BlockToChain[*I];
    642     // Ensure that this block is at the end of a chain; otherwise it could be
    643     // mid-way through an inner loop or a successor of an analyzable branch.
    644     if (*I != *llvm::prior(Chain.end()))
    645       continue;
    646 
    647     // Now walk the successors. We need to establish whether this has a viable
    648     // exiting successor and whether it has a viable non-exiting successor.
    649     // We store the old exiting state and restore it if a viable looping
    650     // successor isn't found.
    651     MachineBasicBlock *OldExitingBB = ExitingBB;
    652     BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
    653     bool HasLoopingSucc = false;
    654     // FIXME: Due to the performance of the probability and weight routines in
    655     // the MBPI analysis, we use the internal weights and manually compute the
    656     // probabilities to avoid quadratic behavior.
    657     uint32_t WeightScale = 0;
    658     uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
    659     for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
    660                                           SE = (*I)->succ_end();
    661          SI != SE; ++SI) {
    662       if ((*SI)->isLandingPad())
    663         continue;
    664       if (*SI == *I)
    665         continue;
    666       BlockChain &SuccChain = *BlockToChain[*SI];
    667       // Don't split chains, either this chain or the successor's chain.
    668       if (&Chain == &SuccChain) {
    669         DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
    670                      << getBlockName(*SI) << " (chain conflict)\n");
    671         continue;
    672       }
    673 
    674       uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
    675       if (LoopBlockSet.count(*SI)) {
    676         DEBUG(dbgs() << "    looping: " << getBlockName(*I) << " -> "
    677                      << getBlockName(*SI) << " (" << SuccWeight << ")\n");
    678         HasLoopingSucc = true;
    679         continue;
    680       }
    681 
    682       unsigned SuccLoopDepth = 0;
    683       if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
    684         SuccLoopDepth = ExitLoop->getLoopDepth();
    685         if (ExitLoop->contains(&L))
    686           BlocksExitingToOuterLoop.insert(*I);
    687       }
    688 
    689       BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
    690       BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
    691       DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
    692                    << getBlockName(*SI) << " [L:" << SuccLoopDepth
    693                    << "] (" << ExitEdgeFreq << ")\n");
    694       // Note that we slightly bias this toward an existing layout successor to
    695       // retain incoming order in the absence of better information.
    696       // FIXME: Should we bias this more strongly? It's pretty weak.
    697       if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
    698           ExitEdgeFreq > BestExitEdgeFreq ||
    699           ((*I)->isLayoutSuccessor(*SI) &&
    700            !(ExitEdgeFreq < BestExitEdgeFreq))) {
    701         BestExitEdgeFreq = ExitEdgeFreq;
    702         ExitingBB = *I;
    703       }
    704     }
    705 
    706     // Restore the old exiting state, no viable looping successor was found.
    707     if (!HasLoopingSucc) {
    708       ExitingBB = OldExitingBB;
    709       BestExitEdgeFreq = OldBestExitEdgeFreq;
    710       continue;
    711     }
    712   }
    713   // Without a candidate exiting block or with only a single block in the
    714   // loop, just use the loop header to layout the loop.
    715   if (!ExitingBB || L.getNumBlocks() == 1)
    716     return 0;
    717 
    718   // Also, if we have exit blocks which lead to outer loops but didn't select
    719   // one of them as the exiting block we are rotating toward, disable loop
    720   // rotation altogether.
    721   if (!BlocksExitingToOuterLoop.empty() &&
    722       !BlocksExitingToOuterLoop.count(ExitingBB))
    723     return 0;
    724 
    725   DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB) << "\n");
    726   return ExitingBB;
    727 }
    728 
    729 /// \brief Attempt to rotate an exiting block to the bottom of the loop.
    730 ///
    731 /// Once we have built a chain, try to rotate it to line up the hot exit block
    732 /// with fallthrough out of the loop if doing so doesn't introduce unnecessary
    733 /// branches. For example, if the loop has fallthrough into its header and out
    734 /// of its bottom already, don't rotate it.
    735 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
    736                                        MachineBasicBlock *ExitingBB,
    737                                        const BlockFilterSet &LoopBlockSet) {
    738   if (!ExitingBB)
    739     return;
    740 
    741   MachineBasicBlock *Top = *LoopChain.begin();
    742   bool ViableTopFallthrough = false;
    743   for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
    744                                         PE = Top->pred_end();
    745        PI != PE; ++PI) {
    746     BlockChain *PredChain = BlockToChain[*PI];
    747     if (!LoopBlockSet.count(*PI) &&
    748         (!PredChain || *PI == *llvm::prior(PredChain->end()))) {
    749       ViableTopFallthrough = true;
    750       break;
    751     }
    752   }
    753 
    754   // If the header has viable fallthrough, check whether the current loop
    755   // bottom is a viable exiting block. If so, bail out as rotating will
    756   // introduce an unnecessary branch.
    757   if (ViableTopFallthrough) {
    758     MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end());
    759     for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
    760                                           SE = Bottom->succ_end();
    761          SI != SE; ++SI) {
    762       BlockChain *SuccChain = BlockToChain[*SI];
    763       if (!LoopBlockSet.count(*SI) &&
    764           (!SuccChain || *SI == *SuccChain->begin()))
    765         return;
    766     }
    767   }
    768 
    769   BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
    770                                           ExitingBB);
    771   if (ExitIt == LoopChain.end())
    772     return;
    773 
    774   std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end());
    775 }
    776 
    777 /// \brief Forms basic block chains from the natural loop structures.
    778 ///
    779 /// These chains are designed to preserve the existing *structure* of the code
    780 /// as much as possible. We can then stitch the chains together in a way which
    781 /// both preserves the topological structure and minimizes taken conditional
    782 /// branches.
    783 void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
    784                                             MachineLoop &L) {
    785   // First recurse through any nested loops, building chains for those inner
    786   // loops.
    787   for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
    788     buildLoopChains(F, **LI);
    789 
    790   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
    791   BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
    792 
    793   // First check to see if there is an obviously preferable top block for the
    794   // loop. This will default to the header, but may end up as one of the
    795   // predecessors to the header if there is one which will result in strictly
    796   // fewer branches in the loop body.
    797   MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
    798 
    799   // If we selected just the header for the loop top, look for a potentially
    800   // profitable exit block in the event that rotating the loop can eliminate
    801   // branches by placing an exit edge at the bottom.
    802   MachineBasicBlock *ExitingBB = 0;
    803   if (LoopTop == L.getHeader())
    804     ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
    805 
    806   BlockChain &LoopChain = *BlockToChain[LoopTop];
    807 
    808   // FIXME: This is a really lame way of walking the chains in the loop: we
    809   // walk the blocks, and use a set to prevent visiting a particular chain
    810   // twice.
    811   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
    812   assert(LoopChain.LoopPredecessors == 0);
    813   UpdatedPreds.insert(&LoopChain);
    814   for (MachineLoop::block_iterator BI = L.block_begin(),
    815                                    BE = L.block_end();
    816        BI != BE; ++BI) {
    817     BlockChain &Chain = *BlockToChain[*BI];
    818     if (!UpdatedPreds.insert(&Chain))
    819       continue;
    820 
    821     assert(Chain.LoopPredecessors == 0);
    822     for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
    823          BCI != BCE; ++BCI) {
    824       assert(BlockToChain[*BCI] == &Chain);
    825       for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
    826                                             PE = (*BCI)->pred_end();
    827            PI != PE; ++PI) {
    828         if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
    829           continue;
    830         ++Chain.LoopPredecessors;
    831       }
    832     }
    833 
    834     if (Chain.LoopPredecessors == 0)
    835       BlockWorkList.push_back(*Chain.begin());
    836   }
    837 
    838   buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
    839   rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
    840 
    841   DEBUG({
    842     // Crash at the end so we get all of the debugging output first.
    843     bool BadLoop = false;
    844     if (LoopChain.LoopPredecessors) {
    845       BadLoop = true;
    846       dbgs() << "Loop chain contains a block without its preds placed!\n"
    847              << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
    848              << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
    849     }
    850     for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
    851          BCI != BCE; ++BCI) {
    852       dbgs() << "          ... " << getBlockName(*BCI) << "\n";
    853       if (!LoopBlockSet.erase(*BCI)) {
    854         // We don't mark the loop as bad here because there are real situations
    855         // where this can occur. For example, with an unanalyzable fallthrough
    856         // from a loop block to a non-loop block or vice versa.
    857         dbgs() << "Loop chain contains a block not contained by the loop!\n"
    858                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
    859                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
    860                << "  Bad block:    " << getBlockName(*BCI) << "\n";
    861       }
    862     }
    863 
    864     if (!LoopBlockSet.empty()) {
    865       BadLoop = true;
    866       for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
    867                                     LBE = LoopBlockSet.end();
    868            LBI != LBE; ++LBI)
    869         dbgs() << "Loop contains blocks never placed into a chain!\n"
    870                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
    871                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
    872                << "  Bad block:    " << getBlockName(*LBI) << "\n";
    873     }
    874     assert(!BadLoop && "Detected problems with the placement of this loop.");
    875   });
    876 }
    877 
    878 void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
    879   // Ensure that every BB in the function has an associated chain to simplify
    880   // the assumptions of the remaining algorithm.
    881   SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
    882   for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
    883     MachineBasicBlock *BB = FI;
    884     BlockChain *Chain
    885       = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
    886     // Also, merge any blocks which we cannot reason about and must preserve
    887     // the exact fallthrough behavior for.
    888     for (;;) {
    889       Cond.clear();
    890       MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
    891       if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
    892         break;
    893 
    894       MachineFunction::iterator NextFI(llvm::next(FI));
    895       MachineBasicBlock *NextBB = NextFI;
    896       // Ensure that the layout successor is a viable block, as we know that
    897       // fallthrough is a possibility.
    898       assert(NextFI != FE && "Can't fallthrough past the last block.");
    899       DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
    900                    << getBlockName(BB) << " -> " << getBlockName(NextBB)
    901                    << "\n");
    902       Chain->merge(NextBB, 0);
    903       FI = NextFI;
    904       BB = NextBB;
    905     }
    906   }
    907 
    908   // Build any loop-based chains.
    909   for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
    910        ++LI)
    911     buildLoopChains(F, **LI);
    912 
    913   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
    914 
    915   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
    916   for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
    917     MachineBasicBlock *BB = &*FI;
    918     BlockChain &Chain = *BlockToChain[BB];
    919     if (!UpdatedPreds.insert(&Chain))
    920       continue;
    921 
    922     assert(Chain.LoopPredecessors == 0);
    923     for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
    924          BCI != BCE; ++BCI) {
    925       assert(BlockToChain[*BCI] == &Chain);
    926       for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
    927                                             PE = (*BCI)->pred_end();
    928            PI != PE; ++PI) {
    929         if (BlockToChain[*PI] == &Chain)
    930           continue;
    931         ++Chain.LoopPredecessors;
    932       }
    933     }
    934 
    935     if (Chain.LoopPredecessors == 0)
    936       BlockWorkList.push_back(*Chain.begin());
    937   }
    938 
    939   BlockChain &FunctionChain = *BlockToChain[&F.front()];
    940   buildChain(&F.front(), FunctionChain, BlockWorkList);
    941 
    942   typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
    943   DEBUG({
    944     // Crash at the end so we get all of the debugging output first.
    945     bool BadFunc = false;
    946     FunctionBlockSetType FunctionBlockSet;
    947     for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
    948       FunctionBlockSet.insert(FI);
    949 
    950     for (BlockChain::iterator BCI = FunctionChain.begin(),
    951                               BCE = FunctionChain.end();
    952          BCI != BCE; ++BCI)
    953       if (!FunctionBlockSet.erase(*BCI)) {
    954         BadFunc = true;
    955         dbgs() << "Function chain contains a block not in the function!\n"
    956                << "  Bad block:    " << getBlockName(*BCI) << "\n";
    957       }
    958 
    959     if (!FunctionBlockSet.empty()) {
    960       BadFunc = true;
    961       for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
    962                                           FBE = FunctionBlockSet.end();
    963            FBI != FBE; ++FBI)
    964         dbgs() << "Function contains blocks never placed into a chain!\n"
    965                << "  Bad block:    " << getBlockName(*FBI) << "\n";
    966     }
    967     assert(!BadFunc && "Detected problems with the block placement.");
    968   });
    969 
    970   // Splice the blocks into place.
    971   MachineFunction::iterator InsertPos = F.begin();
    972   for (BlockChain::iterator BI = FunctionChain.begin(),
    973                             BE = FunctionChain.end();
    974        BI != BE; ++BI) {
    975     DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
    976                                                   : "          ... ")
    977           << getBlockName(*BI) << "\n");
    978     if (InsertPos != MachineFunction::iterator(*BI))
    979       F.splice(InsertPos, *BI);
    980     else
    981       ++InsertPos;
    982 
    983     // Update the terminator of the previous block.
    984     if (BI == FunctionChain.begin())
    985       continue;
    986     MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
    987 
    988     // FIXME: It would be awesome of updateTerminator would just return rather
    989     // than assert when the branch cannot be analyzed in order to remove this
    990     // boiler plate.
    991     Cond.clear();
    992     MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
    993     if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
    994       // The "PrevBB" is not yet updated to reflect current code layout, so,
    995       //   o. it may fall-through to a block without explict "goto" instruction
    996       //      before layout, and no longer fall-through it after layout; or
    997       //   o. just opposite.
    998       //
    999       // AnalyzeBranch() may return erroneous value for FBB when these two
   1000       // situations take place. For the first scenario FBB is mistakenly set
   1001       // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
   1002       // is mistakenly pointing to "*BI".
   1003       //
   1004       bool needUpdateBr = true;
   1005       if (!Cond.empty() && (!FBB || FBB == *BI)) {
   1006         PrevBB->updateTerminator();
   1007         needUpdateBr = false;
   1008         Cond.clear();
   1009         TBB = FBB = 0;
   1010         if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
   1011           // FIXME: This should never take place.
   1012           TBB = FBB = 0;
   1013         }
   1014       }
   1015 
   1016       // If PrevBB has a two-way branch, try to re-order the branches
   1017       // such that we branch to the successor with higher weight first.
   1018       if (TBB && !Cond.empty() && FBB &&
   1019           MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
   1020           !TII->ReverseBranchCondition(Cond)) {
   1021         DEBUG(dbgs() << "Reverse order of the two branches: "
   1022                      << getBlockName(PrevBB) << "\n");
   1023         DEBUG(dbgs() << "    Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
   1024                      << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
   1025         DebugLoc dl;  // FIXME: this is nowhere
   1026         TII->RemoveBranch(*PrevBB);
   1027         TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
   1028         needUpdateBr = true;
   1029       }
   1030       if (needUpdateBr)
   1031         PrevBB->updateTerminator();
   1032     }
   1033   }
   1034 
   1035   // Fixup the last block.
   1036   Cond.clear();
   1037   MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
   1038   if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
   1039     F.back().updateTerminator();
   1040 
   1041   // Walk through the backedges of the function now that we have fully laid out
   1042   // the basic blocks and align the destination of each backedge. We don't rely
   1043   // exclusively on the loop info here so that we can align backedges in
   1044   // unnatural CFGs and backedges that were introduced purely because of the
   1045   // loop rotations done during this layout pass.
   1046   if (F.getFunction()->getAttributes().
   1047         hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
   1048     return;
   1049   unsigned Align = TLI->getPrefLoopAlignment();
   1050   if (!Align)
   1051     return;  // Don't care about loop alignment.
   1052   if (FunctionChain.begin() == FunctionChain.end())
   1053     return;  // Empty chain.
   1054 
   1055   const BranchProbability ColdProb(1, 5); // 20%
   1056   BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
   1057   BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
   1058   for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
   1059                             BE = FunctionChain.end();
   1060        BI != BE; ++BI) {
   1061     // Don't align non-looping basic blocks. These are unlikely to execute
   1062     // enough times to matter in practice. Note that we'll still handle
   1063     // unnatural CFGs inside of a natural outer loop (the common case) and
   1064     // rotated loops.
   1065     MachineLoop *L = MLI->getLoopFor(*BI);
   1066     if (!L)
   1067       continue;
   1068 
   1069     // If the block is cold relative to the function entry don't waste space
   1070     // aligning it.
   1071     BlockFrequency Freq = MBFI->getBlockFreq(*BI);
   1072     if (Freq < WeightedEntryFreq)
   1073       continue;
   1074 
   1075     // If the block is cold relative to its loop header, don't align it
   1076     // regardless of what edges into the block exist.
   1077     MachineBasicBlock *LoopHeader = L->getHeader();
   1078     BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
   1079     if (Freq < (LoopHeaderFreq * ColdProb))
   1080       continue;
   1081 
   1082     // Check for the existence of a non-layout predecessor which would benefit
   1083     // from aligning this block.
   1084     MachineBasicBlock *LayoutPred = *llvm::prior(BI);
   1085 
   1086     // Force alignment if all the predecessors are jumps. We already checked
   1087     // that the block isn't cold above.
   1088     if (!LayoutPred->isSuccessor(*BI)) {
   1089       (*BI)->setAlignment(Align);
   1090       continue;
   1091     }
   1092 
   1093     // Align this block if the layout predecessor's edge into this block is
   1094     // cold relative to the block. When this is true, other predecessors make up
   1095     // all of the hot entries into the block and thus alignment is likely to be
   1096     // important.
   1097     BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
   1098     BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
   1099     if (LayoutEdgeFreq <= (Freq * ColdProb))
   1100       (*BI)->setAlignment(Align);
   1101   }
   1102 }
   1103 
   1104 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   1105   // Check for single-block functions and skip them.
   1106   if (llvm::next(F.begin()) == F.end())
   1107     return false;
   1108 
   1109   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   1110   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   1111   MLI = &getAnalysis<MachineLoopInfo>();
   1112   TII = F.getTarget().getInstrInfo();
   1113   TLI = F.getTarget().getTargetLowering();
   1114   assert(BlockToChain.empty());
   1115 
   1116   buildCFGChains(F);
   1117 
   1118   BlockToChain.clear();
   1119   ChainAllocator.DestroyAll();
   1120 
   1121   if (AlignAllBlock)
   1122     // Align all of the blocks in the function to a specific alignment.
   1123     for (MachineFunction::iterator FI = F.begin(), FE = F.end();
   1124          FI != FE; ++FI)
   1125       FI->setAlignment(AlignAllBlock);
   1126 
   1127   // We always return true as we have no way to track whether the final order
   1128   // differs from the original order.
   1129   return true;
   1130 }
   1131 
   1132 namespace {
   1133 /// \brief A pass to compute block placement statistics.
   1134 ///
   1135 /// A separate pass to compute interesting statistics for evaluating block
   1136 /// placement. This is separate from the actual placement pass so that they can
   1137 /// be computed in the absence of any placement transformations or when using
   1138 /// alternative placement strategies.
   1139 class MachineBlockPlacementStats : public MachineFunctionPass {
   1140   /// \brief A handle to the branch probability pass.
   1141   const MachineBranchProbabilityInfo *MBPI;
   1142 
   1143   /// \brief A handle to the function-wide block frequency pass.
   1144   const MachineBlockFrequencyInfo *MBFI;
   1145 
   1146 public:
   1147   static char ID; // Pass identification, replacement for typeid
   1148   MachineBlockPlacementStats() : MachineFunctionPass(ID) {
   1149     initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
   1150   }
   1151 
   1152   bool runOnMachineFunction(MachineFunction &F);
   1153 
   1154   void getAnalysisUsage(AnalysisUsage &AU) const {
   1155     AU.addRequired<MachineBranchProbabilityInfo>();
   1156     AU.addRequired<MachineBlockFrequencyInfo>();
   1157     AU.setPreservesAll();
   1158     MachineFunctionPass::getAnalysisUsage(AU);
   1159   }
   1160 };
   1161 }
   1162 
   1163 char MachineBlockPlacementStats::ID = 0;
   1164 char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
   1165 INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
   1166                       "Basic Block Placement Stats", false, false)
   1167 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
   1168 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
   1169 INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
   1170                     "Basic Block Placement Stats", false, false)
   1171 
   1172 bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
   1173   // Check for single-block functions and skip them.
   1174   if (llvm::next(F.begin()) == F.end())
   1175     return false;
   1176 
   1177   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   1178   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   1179 
   1180   for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
   1181     BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
   1182     Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
   1183                                                   : NumUncondBranches;
   1184     Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
   1185                                                       : UncondBranchTakenFreq;
   1186     for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
   1187                                           SE = I->succ_end();
   1188          SI != SE; ++SI) {
   1189       // Skip if this successor is a fallthrough.
   1190       if (I->isLayoutSuccessor(*SI))
   1191         continue;
   1192 
   1193       BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
   1194       ++NumBranches;
   1195       BranchTakenFreq += EdgeFreq.getFrequency();
   1196     }
   1197   }
   1198 
   1199   return false;
   1200 }
   1201 
   1202