1 //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements basic block placement transformations using the CFG 11 // structure and branch probability estimates. 12 // 13 // The pass strives to preserve the structure of the CFG (that is, retain 14 // a topological ordering of basic blocks) in the absence of a *strong* signal 15 // to the contrary from probabilities. However, within the CFG structure, it 16 // attempts to choose an ordering which favors placing more likely sequences of 17 // blocks adjacent to each other. 18 // 19 // The algorithm works from the inner-most loop within a function outward, and 20 // at each stage walks through the basic blocks, trying to coalesce them into 21 // sequential chains where allowed by the CFG (or demanded by heavy 22 // probabilities). Finally, it walks the blocks in topological order, and the 23 // first time it reaches a chain of basic blocks, it schedules them in the 24 // function in-order. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "llvm/CodeGen/Passes.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/CodeGen/MachineBasicBlock.h" 34 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 35 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 36 #include "llvm/CodeGen/MachineFunction.h" 37 #include "llvm/CodeGen/MachineFunctionPass.h" 38 #include "llvm/CodeGen/MachineLoopInfo.h" 39 #include "llvm/CodeGen/MachineModuleInfo.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Target/TargetInstrInfo.h" 44 #include "llvm/Target/TargetLowering.h" 45 #include <algorithm> 46 using namespace llvm; 47 48 #define DEBUG_TYPE "block-placement2" 49 50 STATISTIC(NumCondBranches, "Number of conditional branches"); 51 STATISTIC(NumUncondBranches, "Number of uncondittional branches"); 52 STATISTIC(CondBranchTakenFreq, 53 "Potential frequency of taking conditional branches"); 54 STATISTIC(UncondBranchTakenFreq, 55 "Potential frequency of taking unconditional branches"); 56 57 static cl::opt<unsigned> AlignAllBlock("align-all-blocks", 58 cl::desc("Force the alignment of all " 59 "blocks in the function."), 60 cl::init(0), cl::Hidden); 61 62 // FIXME: Find a good default for this flag and remove the flag. 63 static cl::opt<unsigned> 64 ExitBlockBias("block-placement-exit-block-bias", 65 cl::desc("Block frequency percentage a loop exit block needs " 66 "over the original exit to be considered the new exit."), 67 cl::init(0), cl::Hidden); 68 69 namespace { 70 class BlockChain; 71 /// \brief Type for our function-wide basic block -> block chain mapping. 72 typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType; 73 } 74 75 namespace { 76 /// \brief A chain of blocks which will be laid out contiguously. 77 /// 78 /// This is the datastructure representing a chain of consecutive blocks that 79 /// are profitable to layout together in order to maximize fallthrough 80 /// probabilities and code locality. We also can use a block chain to represent 81 /// a sequence of basic blocks which have some external (correctness) 82 /// requirement for sequential layout. 83 /// 84 /// Chains can be built around a single basic block and can be merged to grow 85 /// them. They participate in a block-to-chain mapping, which is updated 86 /// automatically as chains are merged together. 87 class BlockChain { 88 /// \brief The sequence of blocks belonging to this chain. 89 /// 90 /// This is the sequence of blocks for a particular chain. These will be laid 91 /// out in-order within the function. 92 SmallVector<MachineBasicBlock *, 4> Blocks; 93 94 /// \brief A handle to the function-wide basic block to block chain mapping. 95 /// 96 /// This is retained in each block chain to simplify the computation of child 97 /// block chains for SCC-formation and iteration. We store the edges to child 98 /// basic blocks, and map them back to their associated chains using this 99 /// structure. 100 BlockToChainMapType &BlockToChain; 101 102 public: 103 /// \brief Construct a new BlockChain. 104 /// 105 /// This builds a new block chain representing a single basic block in the 106 /// function. It also registers itself as the chain that block participates 107 /// in with the BlockToChain mapping. 108 BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) 109 : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { 110 assert(BB && "Cannot create a chain with a null basic block"); 111 BlockToChain[BB] = this; 112 } 113 114 /// \brief Iterator over blocks within the chain. 115 typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator; 116 117 /// \brief Beginning of blocks within the chain. 118 iterator begin() { return Blocks.begin(); } 119 120 /// \brief End of blocks within the chain. 121 iterator end() { return Blocks.end(); } 122 123 /// \brief Merge a block chain into this one. 124 /// 125 /// This routine merges a block chain into this one. It takes care of forming 126 /// a contiguous sequence of basic blocks, updating the edge list, and 127 /// updating the block -> chain mapping. It does not free or tear down the 128 /// old chain, but the old chain's block list is no longer valid. 129 void merge(MachineBasicBlock *BB, BlockChain *Chain) { 130 assert(BB); 131 assert(!Blocks.empty()); 132 133 // Fast path in case we don't have a chain already. 134 if (!Chain) { 135 assert(!BlockToChain[BB]); 136 Blocks.push_back(BB); 137 BlockToChain[BB] = this; 138 return; 139 } 140 141 assert(BB == *Chain->begin()); 142 assert(Chain->begin() != Chain->end()); 143 144 // Update the incoming blocks to point to this chain, and add them to the 145 // chain structure. 146 for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); 147 BI != BE; ++BI) { 148 Blocks.push_back(*BI); 149 assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain"); 150 BlockToChain[*BI] = this; 151 } 152 } 153 154 #ifndef NDEBUG 155 /// \brief Dump the blocks in this chain. 156 LLVM_DUMP_METHOD void dump() { 157 for (iterator I = begin(), E = end(); I != E; ++I) 158 (*I)->dump(); 159 } 160 #endif // NDEBUG 161 162 /// \brief Count of predecessors within the loop currently being processed. 163 /// 164 /// This count is updated at each loop we process to represent the number of 165 /// in-loop predecessors of this chain. 166 unsigned LoopPredecessors; 167 }; 168 } 169 170 namespace { 171 class MachineBlockPlacement : public MachineFunctionPass { 172 /// \brief A typedef for a block filter set. 173 typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet; 174 175 /// \brief A handle to the branch probability pass. 176 const MachineBranchProbabilityInfo *MBPI; 177 178 /// \brief A handle to the function-wide block frequency pass. 179 const MachineBlockFrequencyInfo *MBFI; 180 181 /// \brief A handle to the loop info. 182 const MachineLoopInfo *MLI; 183 184 /// \brief A handle to the target's instruction info. 185 const TargetInstrInfo *TII; 186 187 /// \brief A handle to the target's lowering info. 188 const TargetLoweringBase *TLI; 189 190 /// \brief Allocator and owner of BlockChain structures. 191 /// 192 /// We build BlockChains lazily while processing the loop structure of 193 /// a function. To reduce malloc traffic, we allocate them using this 194 /// slab-like allocator, and destroy them after the pass completes. An 195 /// important guarantee is that this allocator produces stable pointers to 196 /// the chains. 197 SpecificBumpPtrAllocator<BlockChain> ChainAllocator; 198 199 /// \brief Function wide BasicBlock to BlockChain mapping. 200 /// 201 /// This mapping allows efficiently moving from any given basic block to the 202 /// BlockChain it participates in, if any. We use it to, among other things, 203 /// allow implicitly defining edges between chains as the existing edges 204 /// between basic blocks. 205 DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; 206 207 void markChainSuccessors(BlockChain &Chain, 208 MachineBasicBlock *LoopHeaderBB, 209 SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, 210 const BlockFilterSet *BlockFilter = nullptr); 211 MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, 212 BlockChain &Chain, 213 const BlockFilterSet *BlockFilter); 214 MachineBasicBlock *selectBestCandidateBlock( 215 BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, 216 const BlockFilterSet *BlockFilter); 217 MachineBasicBlock *getFirstUnplacedBlock( 218 MachineFunction &F, 219 const BlockChain &PlacedChain, 220 MachineFunction::iterator &PrevUnplacedBlockIt, 221 const BlockFilterSet *BlockFilter); 222 void buildChain(MachineBasicBlock *BB, BlockChain &Chain, 223 SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, 224 const BlockFilterSet *BlockFilter = nullptr); 225 MachineBasicBlock *findBestLoopTop(MachineLoop &L, 226 const BlockFilterSet &LoopBlockSet); 227 MachineBasicBlock *findBestLoopExit(MachineFunction &F, 228 MachineLoop &L, 229 const BlockFilterSet &LoopBlockSet); 230 void buildLoopChains(MachineFunction &F, MachineLoop &L); 231 void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, 232 const BlockFilterSet &LoopBlockSet); 233 void buildCFGChains(MachineFunction &F); 234 235 public: 236 static char ID; // Pass identification, replacement for typeid 237 MachineBlockPlacement() : MachineFunctionPass(ID) { 238 initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); 239 } 240 241 bool runOnMachineFunction(MachineFunction &F) override; 242 243 void getAnalysisUsage(AnalysisUsage &AU) const override { 244 AU.addRequired<MachineBranchProbabilityInfo>(); 245 AU.addRequired<MachineBlockFrequencyInfo>(); 246 AU.addRequired<MachineLoopInfo>(); 247 MachineFunctionPass::getAnalysisUsage(AU); 248 } 249 }; 250 } 251 252 char MachineBlockPlacement::ID = 0; 253 char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; 254 INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", 255 "Branch Probability Basic Block Placement", false, false) 256 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) 257 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) 258 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 259 INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", 260 "Branch Probability Basic Block Placement", false, false) 261 262 #ifndef NDEBUG 263 /// \brief Helper to print the name of a MBB. 264 /// 265 /// Only used by debug logging. 266 static std::string getBlockName(MachineBasicBlock *BB) { 267 std::string Result; 268 raw_string_ostream OS(Result); 269 OS << "BB#" << BB->getNumber() 270 << " (derived from LLVM BB '" << BB->getName() << "')"; 271 OS.flush(); 272 return Result; 273 } 274 275 /// \brief Helper to print the number of a MBB. 276 /// 277 /// Only used by debug logging. 278 static std::string getBlockNum(MachineBasicBlock *BB) { 279 std::string Result; 280 raw_string_ostream OS(Result); 281 OS << "BB#" << BB->getNumber(); 282 OS.flush(); 283 return Result; 284 } 285 #endif 286 287 /// \brief Mark a chain's successors as having one fewer preds. 288 /// 289 /// When a chain is being merged into the "placed" chain, this routine will 290 /// quickly walk the successors of each block in the chain and mark them as 291 /// having one fewer active predecessor. It also adds any successors of this 292 /// chain which reach the zero-predecessor state to the worklist passed in. 293 void MachineBlockPlacement::markChainSuccessors( 294 BlockChain &Chain, 295 MachineBasicBlock *LoopHeaderBB, 296 SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, 297 const BlockFilterSet *BlockFilter) { 298 // Walk all the blocks in this chain, marking their successors as having 299 // a predecessor placed. 300 for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); 301 CBI != CBE; ++CBI) { 302 // Add any successors for which this is the only un-placed in-loop 303 // predecessor to the worklist as a viable candidate for CFG-neutral 304 // placement. No subsequent placement of this block will violate the CFG 305 // shape, so we get to use heuristics to choose a favorable placement. 306 for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), 307 SE = (*CBI)->succ_end(); 308 SI != SE; ++SI) { 309 if (BlockFilter && !BlockFilter->count(*SI)) 310 continue; 311 BlockChain &SuccChain = *BlockToChain[*SI]; 312 // Disregard edges within a fixed chain, or edges to the loop header. 313 if (&Chain == &SuccChain || *SI == LoopHeaderBB) 314 continue; 315 316 // This is a cross-chain edge that is within the loop, so decrement the 317 // loop predecessor count of the destination chain. 318 if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) 319 BlockWorkList.push_back(*SuccChain.begin()); 320 } 321 } 322 } 323 324 /// \brief Select the best successor for a block. 325 /// 326 /// This looks across all successors of a particular block and attempts to 327 /// select the "best" one to be the layout successor. It only considers direct 328 /// successors which also pass the block filter. It will attempt to avoid 329 /// breaking CFG structure, but cave and break such structures in the case of 330 /// very hot successor edges. 331 /// 332 /// \returns The best successor block found, or null if none are viable. 333 MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( 334 MachineBasicBlock *BB, BlockChain &Chain, 335 const BlockFilterSet *BlockFilter) { 336 const BranchProbability HotProb(4, 5); // 80% 337 338 MachineBasicBlock *BestSucc = nullptr; 339 // FIXME: Due to the performance of the probability and weight routines in 340 // the MBPI analysis, we manually compute probabilities using the edge 341 // weights. This is suboptimal as it means that the somewhat subtle 342 // definition of edge weight semantics is encoded here as well. We should 343 // improve the MBPI interface to efficiently support query patterns such as 344 // this. 345 uint32_t BestWeight = 0; 346 uint32_t WeightScale = 0; 347 uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); 348 DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); 349 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 350 SE = BB->succ_end(); 351 SI != SE; ++SI) { 352 if (BlockFilter && !BlockFilter->count(*SI)) 353 continue; 354 BlockChain &SuccChain = *BlockToChain[*SI]; 355 if (&SuccChain == &Chain) { 356 DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); 357 continue; 358 } 359 if (*SI != *SuccChain.begin()) { 360 DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n"); 361 continue; 362 } 363 364 uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); 365 BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); 366 367 // Only consider successors which are either "hot", or wouldn't violate 368 // any CFG constraints. 369 if (SuccChain.LoopPredecessors != 0) { 370 if (SuccProb < HotProb) { 371 DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb 372 << " (prob) (CFG conflict)\n"); 373 continue; 374 } 375 376 // Make sure that a hot successor doesn't have a globally more important 377 // predecessor. 378 BlockFrequency CandidateEdgeFreq 379 = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); 380 bool BadCFGConflict = false; 381 for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), 382 PE = (*SI)->pred_end(); 383 PI != PE; ++PI) { 384 if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || 385 BlockToChain[*PI] == &Chain) 386 continue; 387 BlockFrequency PredEdgeFreq 388 = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); 389 if (PredEdgeFreq >= CandidateEdgeFreq) { 390 BadCFGConflict = true; 391 break; 392 } 393 } 394 if (BadCFGConflict) { 395 DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb 396 << " (prob) (non-cold CFG conflict)\n"); 397 continue; 398 } 399 } 400 401 DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb 402 << " (prob)" 403 << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") 404 << "\n"); 405 if (BestSucc && BestWeight >= SuccWeight) 406 continue; 407 BestSucc = *SI; 408 BestWeight = SuccWeight; 409 } 410 return BestSucc; 411 } 412 413 /// \brief Select the best block from a worklist. 414 /// 415 /// This looks through the provided worklist as a list of candidate basic 416 /// blocks and select the most profitable one to place. The definition of 417 /// profitable only really makes sense in the context of a loop. This returns 418 /// the most frequently visited block in the worklist, which in the case of 419 /// a loop, is the one most desirable to be physically close to the rest of the 420 /// loop body in order to improve icache behavior. 421 /// 422 /// \returns The best block found, or null if none are viable. 423 MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( 424 BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, 425 const BlockFilterSet *BlockFilter) { 426 // Once we need to walk the worklist looking for a candidate, cleanup the 427 // worklist of already placed entries. 428 // FIXME: If this shows up on profiles, it could be folded (at the cost of 429 // some code complexity) into the loop below. 430 WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), 431 [&](MachineBasicBlock *BB) { 432 return BlockToChain.lookup(BB) == &Chain; 433 }), 434 WorkList.end()); 435 436 MachineBasicBlock *BestBlock = nullptr; 437 BlockFrequency BestFreq; 438 for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), 439 WBE = WorkList.end(); 440 WBI != WBE; ++WBI) { 441 BlockChain &SuccChain = *BlockToChain[*WBI]; 442 if (&SuccChain == &Chain) { 443 DEBUG(dbgs() << " " << getBlockName(*WBI) 444 << " -> Already merged!\n"); 445 continue; 446 } 447 assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); 448 449 BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); 450 DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; 451 MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); 452 if (BestBlock && BestFreq >= CandidateFreq) 453 continue; 454 BestBlock = *WBI; 455 BestFreq = CandidateFreq; 456 } 457 return BestBlock; 458 } 459 460 /// \brief Retrieve the first unplaced basic block. 461 /// 462 /// This routine is called when we are unable to use the CFG to walk through 463 /// all of the basic blocks and form a chain due to unnatural loops in the CFG. 464 /// We walk through the function's blocks in order, starting from the 465 /// LastUnplacedBlockIt. We update this iterator on each call to avoid 466 /// re-scanning the entire sequence on repeated calls to this routine. 467 MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( 468 MachineFunction &F, const BlockChain &PlacedChain, 469 MachineFunction::iterator &PrevUnplacedBlockIt, 470 const BlockFilterSet *BlockFilter) { 471 for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; 472 ++I) { 473 if (BlockFilter && !BlockFilter->count(I)) 474 continue; 475 if (BlockToChain[I] != &PlacedChain) { 476 PrevUnplacedBlockIt = I; 477 // Now select the head of the chain to which the unplaced block belongs 478 // as the block to place. This will force the entire chain to be placed, 479 // and satisfies the requirements of merging chains. 480 return *BlockToChain[I]->begin(); 481 } 482 } 483 return nullptr; 484 } 485 486 void MachineBlockPlacement::buildChain( 487 MachineBasicBlock *BB, 488 BlockChain &Chain, 489 SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, 490 const BlockFilterSet *BlockFilter) { 491 assert(BB); 492 assert(BlockToChain[BB] == &Chain); 493 MachineFunction &F = *BB->getParent(); 494 MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); 495 496 MachineBasicBlock *LoopHeaderBB = BB; 497 markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); 498 BB = *std::prev(Chain.end()); 499 for (;;) { 500 assert(BB); 501 assert(BlockToChain[BB] == &Chain); 502 assert(*std::prev(Chain.end()) == BB); 503 504 // Look for the best viable successor if there is one to place immediately 505 // after this block. 506 MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); 507 508 // If an immediate successor isn't available, look for the best viable 509 // block among those we've identified as not violating the loop's CFG at 510 // this point. This won't be a fallthrough, but it will increase locality. 511 if (!BestSucc) 512 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); 513 514 if (!BestSucc) { 515 BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, 516 BlockFilter); 517 if (!BestSucc) 518 break; 519 520 DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " 521 "layout successor until the CFG reduces\n"); 522 } 523 524 // Place this block, updating the datastructures to reflect its placement. 525 BlockChain &SuccChain = *BlockToChain[BestSucc]; 526 // Zero out LoopPredecessors for the successor we're about to merge in case 527 // we selected a successor that didn't fit naturally into the CFG. 528 SuccChain.LoopPredecessors = 0; 529 DEBUG(dbgs() << "Merging from " << getBlockNum(BB) 530 << " to " << getBlockNum(BestSucc) << "\n"); 531 markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); 532 Chain.merge(BestSucc, &SuccChain); 533 BB = *std::prev(Chain.end()); 534 } 535 536 DEBUG(dbgs() << "Finished forming chain for header block " 537 << getBlockNum(*Chain.begin()) << "\n"); 538 } 539 540 /// \brief Find the best loop top block for layout. 541 /// 542 /// Look for a block which is strictly better than the loop header for laying 543 /// out at the top of the loop. This looks for one and only one pattern: 544 /// a latch block with no conditional exit. This block will cause a conditional 545 /// jump around it or will be the bottom of the loop if we lay it out in place, 546 /// but if it it doesn't end up at the bottom of the loop for any reason, 547 /// rotation alone won't fix it. Because such a block will always result in an 548 /// unconditional jump (for the backedge) rotating it in front of the loop 549 /// header is always profitable. 550 MachineBasicBlock * 551 MachineBlockPlacement::findBestLoopTop(MachineLoop &L, 552 const BlockFilterSet &LoopBlockSet) { 553 // Check that the header hasn't been fused with a preheader block due to 554 // crazy branches. If it has, we need to start with the header at the top to 555 // prevent pulling the preheader into the loop body. 556 BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; 557 if (!LoopBlockSet.count(*HeaderChain.begin())) 558 return L.getHeader(); 559 560 DEBUG(dbgs() << "Finding best loop top for: " 561 << getBlockName(L.getHeader()) << "\n"); 562 563 BlockFrequency BestPredFreq; 564 MachineBasicBlock *BestPred = nullptr; 565 for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), 566 PE = L.getHeader()->pred_end(); 567 PI != PE; ++PI) { 568 MachineBasicBlock *Pred = *PI; 569 if (!LoopBlockSet.count(Pred)) 570 continue; 571 DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " 572 << Pred->succ_size() << " successors, "; 573 MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); 574 if (Pred->succ_size() > 1) 575 continue; 576 577 BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); 578 if (!BestPred || PredFreq > BestPredFreq || 579 (!(PredFreq < BestPredFreq) && 580 Pred->isLayoutSuccessor(L.getHeader()))) { 581 BestPred = Pred; 582 BestPredFreq = PredFreq; 583 } 584 } 585 586 // If no direct predecessor is fine, just use the loop header. 587 if (!BestPred) 588 return L.getHeader(); 589 590 // Walk backwards through any straight line of predecessors. 591 while (BestPred->pred_size() == 1 && 592 (*BestPred->pred_begin())->succ_size() == 1 && 593 *BestPred->pred_begin() != L.getHeader()) 594 BestPred = *BestPred->pred_begin(); 595 596 DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); 597 return BestPred; 598 } 599 600 601 /// \brief Find the best loop exiting block for layout. 602 /// 603 /// This routine implements the logic to analyze the loop looking for the best 604 /// block to layout at the top of the loop. Typically this is done to maximize 605 /// fallthrough opportunities. 606 MachineBasicBlock * 607 MachineBlockPlacement::findBestLoopExit(MachineFunction &F, 608 MachineLoop &L, 609 const BlockFilterSet &LoopBlockSet) { 610 // We don't want to layout the loop linearly in all cases. If the loop header 611 // is just a normal basic block in the loop, we want to look for what block 612 // within the loop is the best one to layout at the top. However, if the loop 613 // header has be pre-merged into a chain due to predecessors not having 614 // analyzable branches, *and* the predecessor it is merged with is *not* part 615 // of the loop, rotating the header into the middle of the loop will create 616 // a non-contiguous range of blocks which is Very Bad. So start with the 617 // header and only rotate if safe. 618 BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; 619 if (!LoopBlockSet.count(*HeaderChain.begin())) 620 return nullptr; 621 622 BlockFrequency BestExitEdgeFreq; 623 unsigned BestExitLoopDepth = 0; 624 MachineBasicBlock *ExitingBB = nullptr; 625 // If there are exits to outer loops, loop rotation can severely limit 626 // fallthrough opportunites unless it selects such an exit. Keep a set of 627 // blocks where rotating to exit with that block will reach an outer loop. 628 SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; 629 630 DEBUG(dbgs() << "Finding best loop exit for: " 631 << getBlockName(L.getHeader()) << "\n"); 632 for (MachineLoop::block_iterator I = L.block_begin(), 633 E = L.block_end(); 634 I != E; ++I) { 635 BlockChain &Chain = *BlockToChain[*I]; 636 // Ensure that this block is at the end of a chain; otherwise it could be 637 // mid-way through an inner loop or a successor of an analyzable branch. 638 if (*I != *std::prev(Chain.end())) 639 continue; 640 641 // Now walk the successors. We need to establish whether this has a viable 642 // exiting successor and whether it has a viable non-exiting successor. 643 // We store the old exiting state and restore it if a viable looping 644 // successor isn't found. 645 MachineBasicBlock *OldExitingBB = ExitingBB; 646 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; 647 bool HasLoopingSucc = false; 648 // FIXME: Due to the performance of the probability and weight routines in 649 // the MBPI analysis, we use the internal weights and manually compute the 650 // probabilities to avoid quadratic behavior. 651 uint32_t WeightScale = 0; 652 uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); 653 for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), 654 SE = (*I)->succ_end(); 655 SI != SE; ++SI) { 656 if ((*SI)->isLandingPad()) 657 continue; 658 if (*SI == *I) 659 continue; 660 BlockChain &SuccChain = *BlockToChain[*SI]; 661 // Don't split chains, either this chain or the successor's chain. 662 if (&Chain == &SuccChain) { 663 DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " 664 << getBlockName(*SI) << " (chain conflict)\n"); 665 continue; 666 } 667 668 uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI); 669 if (LoopBlockSet.count(*SI)) { 670 DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " 671 << getBlockName(*SI) << " (" << SuccWeight << ")\n"); 672 HasLoopingSucc = true; 673 continue; 674 } 675 676 unsigned SuccLoopDepth = 0; 677 if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) { 678 SuccLoopDepth = ExitLoop->getLoopDepth(); 679 if (ExitLoop->contains(&L)) 680 BlocksExitingToOuterLoop.insert(*I); 681 } 682 683 BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); 684 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; 685 DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " 686 << getBlockName(*SI) << " [L:" << SuccLoopDepth 687 << "] ("; 688 MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); 689 // Note that we bias this toward an existing layout successor to retain 690 // incoming order in the absence of better information. The exit must have 691 // a frequency higher than the current exit before we consider breaking 692 // the layout. 693 BranchProbability Bias(100 - ExitBlockBias, 100); 694 if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || 695 ExitEdgeFreq > BestExitEdgeFreq || 696 ((*I)->isLayoutSuccessor(*SI) && 697 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { 698 BestExitEdgeFreq = ExitEdgeFreq; 699 ExitingBB = *I; 700 } 701 } 702 703 // Restore the old exiting state, no viable looping successor was found. 704 if (!HasLoopingSucc) { 705 ExitingBB = OldExitingBB; 706 BestExitEdgeFreq = OldBestExitEdgeFreq; 707 continue; 708 } 709 } 710 // Without a candidate exiting block or with only a single block in the 711 // loop, just use the loop header to layout the loop. 712 if (!ExitingBB || L.getNumBlocks() == 1) 713 return nullptr; 714 715 // Also, if we have exit blocks which lead to outer loops but didn't select 716 // one of them as the exiting block we are rotating toward, disable loop 717 // rotation altogether. 718 if (!BlocksExitingToOuterLoop.empty() && 719 !BlocksExitingToOuterLoop.count(ExitingBB)) 720 return nullptr; 721 722 DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); 723 return ExitingBB; 724 } 725 726 /// \brief Attempt to rotate an exiting block to the bottom of the loop. 727 /// 728 /// Once we have built a chain, try to rotate it to line up the hot exit block 729 /// with fallthrough out of the loop if doing so doesn't introduce unnecessary 730 /// branches. For example, if the loop has fallthrough into its header and out 731 /// of its bottom already, don't rotate it. 732 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, 733 MachineBasicBlock *ExitingBB, 734 const BlockFilterSet &LoopBlockSet) { 735 if (!ExitingBB) 736 return; 737 738 MachineBasicBlock *Top = *LoopChain.begin(); 739 bool ViableTopFallthrough = false; 740 for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), 741 PE = Top->pred_end(); 742 PI != PE; ++PI) { 743 BlockChain *PredChain = BlockToChain[*PI]; 744 if (!LoopBlockSet.count(*PI) && 745 (!PredChain || *PI == *std::prev(PredChain->end()))) { 746 ViableTopFallthrough = true; 747 break; 748 } 749 } 750 751 // If the header has viable fallthrough, check whether the current loop 752 // bottom is a viable exiting block. If so, bail out as rotating will 753 // introduce an unnecessary branch. 754 if (ViableTopFallthrough) { 755 MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); 756 for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), 757 SE = Bottom->succ_end(); 758 SI != SE; ++SI) { 759 BlockChain *SuccChain = BlockToChain[*SI]; 760 if (!LoopBlockSet.count(*SI) && 761 (!SuccChain || *SI == *SuccChain->begin())) 762 return; 763 } 764 } 765 766 BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), 767 ExitingBB); 768 if (ExitIt == LoopChain.end()) 769 return; 770 771 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); 772 } 773 774 /// \brief Forms basic block chains from the natural loop structures. 775 /// 776 /// These chains are designed to preserve the existing *structure* of the code 777 /// as much as possible. We can then stitch the chains together in a way which 778 /// both preserves the topological structure and minimizes taken conditional 779 /// branches. 780 void MachineBlockPlacement::buildLoopChains(MachineFunction &F, 781 MachineLoop &L) { 782 // First recurse through any nested loops, building chains for those inner 783 // loops. 784 for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) 785 buildLoopChains(F, **LI); 786 787 SmallVector<MachineBasicBlock *, 16> BlockWorkList; 788 BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); 789 790 // First check to see if there is an obviously preferable top block for the 791 // loop. This will default to the header, but may end up as one of the 792 // predecessors to the header if there is one which will result in strictly 793 // fewer branches in the loop body. 794 MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); 795 796 // If we selected just the header for the loop top, look for a potentially 797 // profitable exit block in the event that rotating the loop can eliminate 798 // branches by placing an exit edge at the bottom. 799 MachineBasicBlock *ExitingBB = nullptr; 800 if (LoopTop == L.getHeader()) 801 ExitingBB = findBestLoopExit(F, L, LoopBlockSet); 802 803 BlockChain &LoopChain = *BlockToChain[LoopTop]; 804 805 // FIXME: This is a really lame way of walking the chains in the loop: we 806 // walk the blocks, and use a set to prevent visiting a particular chain 807 // twice. 808 SmallPtrSet<BlockChain *, 4> UpdatedPreds; 809 assert(LoopChain.LoopPredecessors == 0); 810 UpdatedPreds.insert(&LoopChain); 811 for (MachineLoop::block_iterator BI = L.block_begin(), 812 BE = L.block_end(); 813 BI != BE; ++BI) { 814 BlockChain &Chain = *BlockToChain[*BI]; 815 if (!UpdatedPreds.insert(&Chain)) 816 continue; 817 818 assert(Chain.LoopPredecessors == 0); 819 for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); 820 BCI != BCE; ++BCI) { 821 assert(BlockToChain[*BCI] == &Chain); 822 for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), 823 PE = (*BCI)->pred_end(); 824 PI != PE; ++PI) { 825 if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) 826 continue; 827 ++Chain.LoopPredecessors; 828 } 829 } 830 831 if (Chain.LoopPredecessors == 0) 832 BlockWorkList.push_back(*Chain.begin()); 833 } 834 835 buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); 836 rotateLoop(LoopChain, ExitingBB, LoopBlockSet); 837 838 DEBUG({ 839 // Crash at the end so we get all of the debugging output first. 840 bool BadLoop = false; 841 if (LoopChain.LoopPredecessors) { 842 BadLoop = true; 843 dbgs() << "Loop chain contains a block without its preds placed!\n" 844 << " Loop header: " << getBlockName(*L.block_begin()) << "\n" 845 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; 846 } 847 for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); 848 BCI != BCE; ++BCI) { 849 dbgs() << " ... " << getBlockName(*BCI) << "\n"; 850 if (!LoopBlockSet.erase(*BCI)) { 851 // We don't mark the loop as bad here because there are real situations 852 // where this can occur. For example, with an unanalyzable fallthrough 853 // from a loop block to a non-loop block or vice versa. 854 dbgs() << "Loop chain contains a block not contained by the loop!\n" 855 << " Loop header: " << getBlockName(*L.block_begin()) << "\n" 856 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" 857 << " Bad block: " << getBlockName(*BCI) << "\n"; 858 } 859 } 860 861 if (!LoopBlockSet.empty()) { 862 BadLoop = true; 863 for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), 864 LBE = LoopBlockSet.end(); 865 LBI != LBE; ++LBI) 866 dbgs() << "Loop contains blocks never placed into a chain!\n" 867 << " Loop header: " << getBlockName(*L.block_begin()) << "\n" 868 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" 869 << " Bad block: " << getBlockName(*LBI) << "\n"; 870 } 871 assert(!BadLoop && "Detected problems with the placement of this loop."); 872 }); 873 } 874 875 void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { 876 // Ensure that every BB in the function has an associated chain to simplify 877 // the assumptions of the remaining algorithm. 878 SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. 879 for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { 880 MachineBasicBlock *BB = FI; 881 BlockChain *Chain 882 = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); 883 // Also, merge any blocks which we cannot reason about and must preserve 884 // the exact fallthrough behavior for. 885 for (;;) { 886 Cond.clear(); 887 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. 888 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) 889 break; 890 891 MachineFunction::iterator NextFI(std::next(FI)); 892 MachineBasicBlock *NextBB = NextFI; 893 // Ensure that the layout successor is a viable block, as we know that 894 // fallthrough is a possibility. 895 assert(NextFI != FE && "Can't fallthrough past the last block."); 896 DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " 897 << getBlockName(BB) << " -> " << getBlockName(NextBB) 898 << "\n"); 899 Chain->merge(NextBB, nullptr); 900 FI = NextFI; 901 BB = NextBB; 902 } 903 } 904 905 // Build any loop-based chains. 906 for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; 907 ++LI) 908 buildLoopChains(F, **LI); 909 910 SmallVector<MachineBasicBlock *, 16> BlockWorkList; 911 912 SmallPtrSet<BlockChain *, 4> UpdatedPreds; 913 for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { 914 MachineBasicBlock *BB = &*FI; 915 BlockChain &Chain = *BlockToChain[BB]; 916 if (!UpdatedPreds.insert(&Chain)) 917 continue; 918 919 assert(Chain.LoopPredecessors == 0); 920 for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); 921 BCI != BCE; ++BCI) { 922 assert(BlockToChain[*BCI] == &Chain); 923 for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), 924 PE = (*BCI)->pred_end(); 925 PI != PE; ++PI) { 926 if (BlockToChain[*PI] == &Chain) 927 continue; 928 ++Chain.LoopPredecessors; 929 } 930 } 931 932 if (Chain.LoopPredecessors == 0) 933 BlockWorkList.push_back(*Chain.begin()); 934 } 935 936 BlockChain &FunctionChain = *BlockToChain[&F.front()]; 937 buildChain(&F.front(), FunctionChain, BlockWorkList); 938 939 #ifndef NDEBUG 940 typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; 941 #endif 942 DEBUG({ 943 // Crash at the end so we get all of the debugging output first. 944 bool BadFunc = false; 945 FunctionBlockSetType FunctionBlockSet; 946 for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) 947 FunctionBlockSet.insert(FI); 948 949 for (BlockChain::iterator BCI = FunctionChain.begin(), 950 BCE = FunctionChain.end(); 951 BCI != BCE; ++BCI) 952 if (!FunctionBlockSet.erase(*BCI)) { 953 BadFunc = true; 954 dbgs() << "Function chain contains a block not in the function!\n" 955 << " Bad block: " << getBlockName(*BCI) << "\n"; 956 } 957 958 if (!FunctionBlockSet.empty()) { 959 BadFunc = true; 960 for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), 961 FBE = FunctionBlockSet.end(); 962 FBI != FBE; ++FBI) 963 dbgs() << "Function contains blocks never placed into a chain!\n" 964 << " Bad block: " << getBlockName(*FBI) << "\n"; 965 } 966 assert(!BadFunc && "Detected problems with the block placement."); 967 }); 968 969 // Splice the blocks into place. 970 MachineFunction::iterator InsertPos = F.begin(); 971 for (BlockChain::iterator BI = FunctionChain.begin(), 972 BE = FunctionChain.end(); 973 BI != BE; ++BI) { 974 DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " 975 : " ... ") 976 << getBlockName(*BI) << "\n"); 977 if (InsertPos != MachineFunction::iterator(*BI)) 978 F.splice(InsertPos, *BI); 979 else 980 ++InsertPos; 981 982 // Update the terminator of the previous block. 983 if (BI == FunctionChain.begin()) 984 continue; 985 MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); 986 987 // FIXME: It would be awesome of updateTerminator would just return rather 988 // than assert when the branch cannot be analyzed in order to remove this 989 // boiler plate. 990 Cond.clear(); 991 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. 992 if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { 993 // The "PrevBB" is not yet updated to reflect current code layout, so, 994 // o. it may fall-through to a block without explict "goto" instruction 995 // before layout, and no longer fall-through it after layout; or 996 // o. just opposite. 997 // 998 // AnalyzeBranch() may return erroneous value for FBB when these two 999 // situations take place. For the first scenario FBB is mistakenly set 1000 // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, 1001 // is mistakenly pointing to "*BI". 1002 // 1003 bool needUpdateBr = true; 1004 if (!Cond.empty() && (!FBB || FBB == *BI)) { 1005 PrevBB->updateTerminator(); 1006 needUpdateBr = false; 1007 Cond.clear(); 1008 TBB = FBB = nullptr; 1009 if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { 1010 // FIXME: This should never take place. 1011 TBB = FBB = nullptr; 1012 } 1013 } 1014 1015 // If PrevBB has a two-way branch, try to re-order the branches 1016 // such that we branch to the successor with higher weight first. 1017 if (TBB && !Cond.empty() && FBB && 1018 MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && 1019 !TII->ReverseBranchCondition(Cond)) { 1020 DEBUG(dbgs() << "Reverse order of the two branches: " 1021 << getBlockName(PrevBB) << "\n"); 1022 DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) 1023 << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); 1024 DebugLoc dl; // FIXME: this is nowhere 1025 TII->RemoveBranch(*PrevBB); 1026 TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); 1027 needUpdateBr = true; 1028 } 1029 if (needUpdateBr) 1030 PrevBB->updateTerminator(); 1031 } 1032 } 1033 1034 // Fixup the last block. 1035 Cond.clear(); 1036 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. 1037 if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) 1038 F.back().updateTerminator(); 1039 1040 // Walk through the backedges of the function now that we have fully laid out 1041 // the basic blocks and align the destination of each backedge. We don't rely 1042 // exclusively on the loop info here so that we can align backedges in 1043 // unnatural CFGs and backedges that were introduced purely because of the 1044 // loop rotations done during this layout pass. 1045 if (F.getFunction()->getAttributes(). 1046 hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) 1047 return; 1048 unsigned Align = TLI->getPrefLoopAlignment(); 1049 if (!Align) 1050 return; // Don't care about loop alignment. 1051 if (FunctionChain.begin() == FunctionChain.end()) 1052 return; // Empty chain. 1053 1054 const BranchProbability ColdProb(1, 5); // 20% 1055 BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); 1056 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; 1057 for (BlockChain::iterator BI = std::next(FunctionChain.begin()), 1058 BE = FunctionChain.end(); 1059 BI != BE; ++BI) { 1060 // Don't align non-looping basic blocks. These are unlikely to execute 1061 // enough times to matter in practice. Note that we'll still handle 1062 // unnatural CFGs inside of a natural outer loop (the common case) and 1063 // rotated loops. 1064 MachineLoop *L = MLI->getLoopFor(*BI); 1065 if (!L) 1066 continue; 1067 1068 // If the block is cold relative to the function entry don't waste space 1069 // aligning it. 1070 BlockFrequency Freq = MBFI->getBlockFreq(*BI); 1071 if (Freq < WeightedEntryFreq) 1072 continue; 1073 1074 // If the block is cold relative to its loop header, don't align it 1075 // regardless of what edges into the block exist. 1076 MachineBasicBlock *LoopHeader = L->getHeader(); 1077 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); 1078 if (Freq < (LoopHeaderFreq * ColdProb)) 1079 continue; 1080 1081 // Check for the existence of a non-layout predecessor which would benefit 1082 // from aligning this block. 1083 MachineBasicBlock *LayoutPred = *std::prev(BI); 1084 1085 // Force alignment if all the predecessors are jumps. We already checked 1086 // that the block isn't cold above. 1087 if (!LayoutPred->isSuccessor(*BI)) { 1088 (*BI)->setAlignment(Align); 1089 continue; 1090 } 1091 1092 // Align this block if the layout predecessor's edge into this block is 1093 // cold relative to the block. When this is true, other predecessors make up 1094 // all of the hot entries into the block and thus alignment is likely to be 1095 // important. 1096 BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); 1097 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; 1098 if (LayoutEdgeFreq <= (Freq * ColdProb)) 1099 (*BI)->setAlignment(Align); 1100 } 1101 } 1102 1103 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { 1104 // Check for single-block functions and skip them. 1105 if (std::next(F.begin()) == F.end()) 1106 return false; 1107 1108 if (skipOptnoneFunction(*F.getFunction())) 1109 return false; 1110 1111 MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); 1112 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 1113 MLI = &getAnalysis<MachineLoopInfo>(); 1114 TII = F.getTarget().getInstrInfo(); 1115 TLI = F.getTarget().getTargetLowering(); 1116 assert(BlockToChain.empty()); 1117 1118 buildCFGChains(F); 1119 1120 BlockToChain.clear(); 1121 ChainAllocator.DestroyAll(); 1122 1123 if (AlignAllBlock) 1124 // Align all of the blocks in the function to a specific alignment. 1125 for (MachineFunction::iterator FI = F.begin(), FE = F.end(); 1126 FI != FE; ++FI) 1127 FI->setAlignment(AlignAllBlock); 1128 1129 // We always return true as we have no way to track whether the final order 1130 // differs from the original order. 1131 return true; 1132 } 1133 1134 namespace { 1135 /// \brief A pass to compute block placement statistics. 1136 /// 1137 /// A separate pass to compute interesting statistics for evaluating block 1138 /// placement. This is separate from the actual placement pass so that they can 1139 /// be computed in the absence of any placement transformations or when using 1140 /// alternative placement strategies. 1141 class MachineBlockPlacementStats : public MachineFunctionPass { 1142 /// \brief A handle to the branch probability pass. 1143 const MachineBranchProbabilityInfo *MBPI; 1144 1145 /// \brief A handle to the function-wide block frequency pass. 1146 const MachineBlockFrequencyInfo *MBFI; 1147 1148 public: 1149 static char ID; // Pass identification, replacement for typeid 1150 MachineBlockPlacementStats() : MachineFunctionPass(ID) { 1151 initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); 1152 } 1153 1154 bool runOnMachineFunction(MachineFunction &F) override; 1155 1156 void getAnalysisUsage(AnalysisUsage &AU) const override { 1157 AU.addRequired<MachineBranchProbabilityInfo>(); 1158 AU.addRequired<MachineBlockFrequencyInfo>(); 1159 AU.setPreservesAll(); 1160 MachineFunctionPass::getAnalysisUsage(AU); 1161 } 1162 }; 1163 } 1164 1165 char MachineBlockPlacementStats::ID = 0; 1166 char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID; 1167 INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", 1168 "Basic Block Placement Stats", false, false) 1169 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) 1170 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) 1171 INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", 1172 "Basic Block Placement Stats", false, false) 1173 1174 bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { 1175 // Check for single-block functions and skip them. 1176 if (std::next(F.begin()) == F.end()) 1177 return false; 1178 1179 MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); 1180 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 1181 1182 for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { 1183 BlockFrequency BlockFreq = MBFI->getBlockFreq(I); 1184 Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches 1185 : NumUncondBranches; 1186 Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq 1187 : UncondBranchTakenFreq; 1188 for (MachineBasicBlock::succ_iterator SI = I->succ_begin(), 1189 SE = I->succ_end(); 1190 SI != SE; ++SI) { 1191 // Skip if this successor is a fallthrough. 1192 if (I->isLayoutSuccessor(*SI)) 1193 continue; 1194 1195 BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI); 1196 ++NumBranches; 1197 BranchTakenFreq += EdgeFreq.getFrequency(); 1198 } 1199 } 1200 1201 return false; 1202 } 1203 1204