1 //===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass transforms loops that contain branches on loop-invariant conditions 11 // to have multiple loops. For example, it turns the left into the right code: 12 // 13 // for (...) if (lic) 14 // A for (...) 15 // if (lic) A; B; C 16 // B else 17 // C for (...) 18 // A; C 19 // 20 // This can increase the size of the code exponentially (doubling it every time 21 // a loop is unswitched) so we only unswitch if the resultant code will be 22 // smaller than a threshold. 23 // 24 // This pass expects LICM to be run before it to hoist invariant conditions out 25 // of the loop, to make the unswitching opportunity obvious. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "llvm/Transforms/Scalar.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Analysis/GlobalsModRef.h" 34 #include "llvm/Analysis/AssumptionCache.h" 35 #include "llvm/Analysis/CodeMetrics.h" 36 #include "llvm/Analysis/InstructionSimplify.h" 37 #include "llvm/Analysis/LoopInfo.h" 38 #include "llvm/Analysis/LoopPass.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/TargetTransformInfo.h" 41 #include "llvm/Analysis/BlockFrequencyInfoImpl.h" 42 #include "llvm/Analysis/BlockFrequencyInfo.h" 43 #include "llvm/Analysis/BranchProbabilityInfo.h" 44 #include "llvm/Support/BranchProbability.h" 45 #include "llvm/IR/Constants.h" 46 #include "llvm/IR/DerivedTypes.h" 47 #include "llvm/IR/Dominators.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/Instructions.h" 50 #include "llvm/IR/Module.h" 51 #include "llvm/IR/MDBuilder.h" 52 #include "llvm/Support/CommandLine.h" 53 #include "llvm/Support/Debug.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 56 #include "llvm/Transforms/Utils/Cloning.h" 57 #include "llvm/Transforms/Utils/Local.h" 58 #include <algorithm> 59 #include <map> 60 #include <set> 61 using namespace llvm; 62 63 #define DEBUG_TYPE "loop-unswitch" 64 65 STATISTIC(NumBranches, "Number of branches unswitched"); 66 STATISTIC(NumSwitches, "Number of switches unswitched"); 67 STATISTIC(NumSelects , "Number of selects unswitched"); 68 STATISTIC(NumTrivial , "Number of unswitches that are trivial"); 69 STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); 70 STATISTIC(TotalInsts, "Total number of instructions analyzed"); 71 72 // The specific value of 100 here was chosen based only on intuition and a 73 // few specific examples. 74 static cl::opt<unsigned> 75 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), 76 cl::init(100), cl::Hidden); 77 78 static cl::opt<bool> 79 LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency", 80 cl::init(false), cl::Hidden, 81 cl::desc("Enable the use of the block frequency analysis to access PGO " 82 "heuristics to minimize code growth in cold regions.")); 83 84 static cl::opt<unsigned> 85 ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden, 86 cl::desc("Coldness threshold in percentage. The loop header frequency " 87 "(relative to the entry frequency) is compared with this " 88 "threshold to determine if non-trivial unswitching should be " 89 "enabled.")); 90 91 namespace { 92 93 class LUAnalysisCache { 94 95 typedef DenseMap<const SwitchInst*, SmallPtrSet<const Value *, 8> > 96 UnswitchedValsMap; 97 98 typedef UnswitchedValsMap::iterator UnswitchedValsIt; 99 100 struct LoopProperties { 101 unsigned CanBeUnswitchedCount; 102 unsigned WasUnswitchedCount; 103 unsigned SizeEstimation; 104 UnswitchedValsMap UnswitchedVals; 105 }; 106 107 // Here we use std::map instead of DenseMap, since we need to keep valid 108 // LoopProperties pointer for current loop for better performance. 109 typedef std::map<const Loop*, LoopProperties> LoopPropsMap; 110 typedef LoopPropsMap::iterator LoopPropsMapIt; 111 112 LoopPropsMap LoopsProperties; 113 UnswitchedValsMap *CurLoopInstructions; 114 LoopProperties *CurrentLoopProperties; 115 116 // A loop unswitching with an estimated cost above this threshold 117 // is not performed. MaxSize is turned into unswitching quota for 118 // the current loop, and reduced correspondingly, though note that 119 // the quota is returned by releaseMemory() when the loop has been 120 // processed, so that MaxSize will return to its previous 121 // value. So in most cases MaxSize will equal the Threshold flag 122 // when a new loop is processed. An exception to that is that 123 // MaxSize will have a smaller value while processing nested loops 124 // that were introduced due to loop unswitching of an outer loop. 125 // 126 // FIXME: The way that MaxSize works is subtle and depends on the 127 // pass manager processing loops and calling releaseMemory() in a 128 // specific order. It would be good to find a more straightforward 129 // way of doing what MaxSize does. 130 unsigned MaxSize; 131 132 public: 133 LUAnalysisCache() 134 : CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr), 135 MaxSize(Threshold) {} 136 137 // Analyze loop. Check its size, calculate is it possible to unswitch 138 // it. Returns true if we can unswitch this loop. 139 bool countLoop(const Loop *L, const TargetTransformInfo &TTI, 140 AssumptionCache *AC); 141 142 // Clean all data related to given loop. 143 void forgetLoop(const Loop *L); 144 145 // Mark case value as unswitched. 146 // Since SI instruction can be partly unswitched, in order to avoid 147 // extra unswitching in cloned loops keep track all unswitched values. 148 void setUnswitched(const SwitchInst *SI, const Value *V); 149 150 // Check was this case value unswitched before or not. 151 bool isUnswitched(const SwitchInst *SI, const Value *V); 152 153 // Returns true if another unswitching could be done within the cost 154 // threshold. 155 bool CostAllowsUnswitching(); 156 157 // Clone all loop-unswitch related loop properties. 158 // Redistribute unswitching quotas. 159 // Note, that new loop data is stored inside the VMap. 160 void cloneData(const Loop *NewLoop, const Loop *OldLoop, 161 const ValueToValueMapTy &VMap); 162 }; 163 164 class LoopUnswitch : public LoopPass { 165 LoopInfo *LI; // Loop information 166 LPPassManager *LPM; 167 AssumptionCache *AC; 168 169 // Used to check if second loop needs processing after 170 // RewriteLoopBodyWithConditionConstant rewrites first loop. 171 std::vector<Loop*> LoopProcessWorklist; 172 173 LUAnalysisCache BranchesInfo; 174 175 bool EnabledPGO; 176 177 // BFI and ColdEntryFreq are only used when PGO and 178 // LoopUnswitchWithBlockFrequency are enabled. 179 BlockFrequencyInfo BFI; 180 BlockFrequency ColdEntryFreq; 181 182 bool OptimizeForSize; 183 bool redoLoop; 184 185 Loop *currentLoop; 186 DominatorTree *DT; 187 BasicBlock *loopHeader; 188 BasicBlock *loopPreheader; 189 190 // LoopBlocks contains all of the basic blocks of the loop, including the 191 // preheader of the loop, the body of the loop, and the exit blocks of the 192 // loop, in that order. 193 std::vector<BasicBlock*> LoopBlocks; 194 // NewBlocks contained cloned copy of basic blocks from LoopBlocks. 195 std::vector<BasicBlock*> NewBlocks; 196 197 public: 198 static char ID; // Pass ID, replacement for typeid 199 explicit LoopUnswitch(bool Os = false) : 200 LoopPass(ID), OptimizeForSize(Os), redoLoop(false), 201 currentLoop(nullptr), DT(nullptr), loopHeader(nullptr), 202 loopPreheader(nullptr) { 203 initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); 204 } 205 206 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 207 bool processCurrentLoop(); 208 209 /// This transformation requires natural loop information & requires that 210 /// loop preheaders be inserted into the CFG. 211 /// 212 void getAnalysisUsage(AnalysisUsage &AU) const override { 213 AU.addRequired<AssumptionCacheTracker>(); 214 AU.addRequiredID(LoopSimplifyID); 215 AU.addPreservedID(LoopSimplifyID); 216 AU.addRequired<LoopInfoWrapperPass>(); 217 AU.addPreserved<LoopInfoWrapperPass>(); 218 AU.addRequiredID(LCSSAID); 219 AU.addPreservedID(LCSSAID); 220 AU.addRequired<DominatorTreeWrapperPass>(); 221 AU.addPreserved<DominatorTreeWrapperPass>(); 222 AU.addPreserved<ScalarEvolutionWrapperPass>(); 223 AU.addRequired<TargetTransformInfoWrapperPass>(); 224 AU.addPreserved<GlobalsAAWrapperPass>(); 225 } 226 227 private: 228 229 void releaseMemory() override { 230 BranchesInfo.forgetLoop(currentLoop); 231 } 232 233 void initLoopData() { 234 loopHeader = currentLoop->getHeader(); 235 loopPreheader = currentLoop->getLoopPreheader(); 236 } 237 238 /// Split all of the edges from inside the loop to their exit blocks. 239 /// Update the appropriate Phi nodes as we do so. 240 void SplitExitEdges(Loop *L, 241 const SmallVectorImpl<BasicBlock *> &ExitBlocks); 242 243 bool TryTrivialLoopUnswitch(bool &Changed); 244 245 bool UnswitchIfProfitable(Value *LoopCond, Constant *Val, 246 TerminatorInst *TI = nullptr); 247 void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 248 BasicBlock *ExitBlock, TerminatorInst *TI); 249 void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, 250 TerminatorInst *TI); 251 252 void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 253 Constant *Val, bool isEqual); 254 255 void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, 256 BasicBlock *TrueDest, 257 BasicBlock *FalseDest, 258 Instruction *InsertPt, 259 TerminatorInst *TI); 260 261 void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L); 262 }; 263 } 264 265 // Analyze loop. Check its size, calculate is it possible to unswitch 266 // it. Returns true if we can unswitch this loop. 267 bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, 268 AssumptionCache *AC) { 269 270 LoopPropsMapIt PropsIt; 271 bool Inserted; 272 std::tie(PropsIt, Inserted) = 273 LoopsProperties.insert(std::make_pair(L, LoopProperties())); 274 275 LoopProperties &Props = PropsIt->second; 276 277 if (Inserted) { 278 // New loop. 279 280 // Limit the number of instructions to avoid causing significant code 281 // expansion, and the number of basic blocks, to avoid loops with 282 // large numbers of branches which cause loop unswitching to go crazy. 283 // This is a very ad-hoc heuristic. 284 285 SmallPtrSet<const Value *, 32> EphValues; 286 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 287 288 // FIXME: This is overly conservative because it does not take into 289 // consideration code simplification opportunities and code that can 290 // be shared by the resultant unswitched loops. 291 CodeMetrics Metrics; 292 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; 293 ++I) 294 Metrics.analyzeBasicBlock(*I, TTI, EphValues); 295 296 Props.SizeEstimation = Metrics.NumInsts; 297 Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); 298 Props.WasUnswitchedCount = 0; 299 MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; 300 301 if (Metrics.notDuplicatable) { 302 DEBUG(dbgs() << "NOT unswitching loop %" 303 << L->getHeader()->getName() << ", contents cannot be " 304 << "duplicated!\n"); 305 return false; 306 } 307 } 308 309 // Be careful. This links are good only before new loop addition. 310 CurrentLoopProperties = &Props; 311 CurLoopInstructions = &Props.UnswitchedVals; 312 313 return true; 314 } 315 316 // Clean all data related to given loop. 317 void LUAnalysisCache::forgetLoop(const Loop *L) { 318 319 LoopPropsMapIt LIt = LoopsProperties.find(L); 320 321 if (LIt != LoopsProperties.end()) { 322 LoopProperties &Props = LIt->second; 323 MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) * 324 Props.SizeEstimation; 325 LoopsProperties.erase(LIt); 326 } 327 328 CurrentLoopProperties = nullptr; 329 CurLoopInstructions = nullptr; 330 } 331 332 // Mark case value as unswitched. 333 // Since SI instruction can be partly unswitched, in order to avoid 334 // extra unswitching in cloned loops keep track all unswitched values. 335 void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) { 336 (*CurLoopInstructions)[SI].insert(V); 337 } 338 339 // Check was this case value unswitched before or not. 340 bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) { 341 return (*CurLoopInstructions)[SI].count(V); 342 } 343 344 bool LUAnalysisCache::CostAllowsUnswitching() { 345 return CurrentLoopProperties->CanBeUnswitchedCount > 0; 346 } 347 348 // Clone all loop-unswitch related loop properties. 349 // Redistribute unswitching quotas. 350 // Note, that new loop data is stored inside the VMap. 351 void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop, 352 const ValueToValueMapTy &VMap) { 353 354 LoopProperties &NewLoopProps = LoopsProperties[NewLoop]; 355 LoopProperties &OldLoopProps = *CurrentLoopProperties; 356 UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals; 357 358 // Reallocate "can-be-unswitched quota" 359 360 --OldLoopProps.CanBeUnswitchedCount; 361 ++OldLoopProps.WasUnswitchedCount; 362 NewLoopProps.WasUnswitchedCount = 0; 363 unsigned Quota = OldLoopProps.CanBeUnswitchedCount; 364 NewLoopProps.CanBeUnswitchedCount = Quota / 2; 365 OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; 366 367 NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation; 368 369 // Clone unswitched values info: 370 // for new loop switches we clone info about values that was 371 // already unswitched and has redundant successors. 372 for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) { 373 const SwitchInst *OldInst = I->first; 374 Value *NewI = VMap.lookup(OldInst); 375 const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI); 376 assert(NewInst && "All instructions that are in SrcBB must be in VMap."); 377 378 NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst]; 379 } 380 } 381 382 char LoopUnswitch::ID = 0; 383 INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", 384 false, false) 385 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 386 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 387 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 388 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 389 INITIALIZE_PASS_DEPENDENCY(LCSSA) 390 INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", 391 false, false) 392 393 Pass *llvm::createLoopUnswitchPass(bool Os) { 394 return new LoopUnswitch(Os); 395 } 396 397 /// Cond is a condition that occurs in L. If it is invariant in the loop, or has 398 /// an invariant piece, return the invariant. Otherwise, return null. 399 static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { 400 401 // We started analyze new instruction, increment scanned instructions counter. 402 ++TotalInsts; 403 404 // We can never unswitch on vector conditions. 405 if (Cond->getType()->isVectorTy()) 406 return nullptr; 407 408 // Constants should be folded, not unswitched on! 409 if (isa<Constant>(Cond)) return nullptr; 410 411 // TODO: Handle: br (VARIANT|INVARIANT). 412 413 // Hoist simple values out. 414 if (L->makeLoopInvariant(Cond, Changed)) 415 return Cond; 416 417 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond)) 418 if (BO->getOpcode() == Instruction::And || 419 BO->getOpcode() == Instruction::Or) { 420 // If either the left or right side is invariant, we can unswitch on this, 421 // which will cause the branch to go away in one loop and the condition to 422 // simplify in the other one. 423 if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed)) 424 return LHS; 425 if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed)) 426 return RHS; 427 } 428 429 return nullptr; 430 } 431 432 bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { 433 if (skipOptnoneFunction(L)) 434 return false; 435 436 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 437 *L->getHeader()->getParent()); 438 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 439 LPM = &LPM_Ref; 440 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 441 currentLoop = L; 442 Function *F = currentLoop->getHeader()->getParent(); 443 444 EnabledPGO = F->getEntryCount().hasValue(); 445 446 if (LoopUnswitchWithBlockFrequency && EnabledPGO) { 447 BranchProbabilityInfo BPI(*F, *LI); 448 BFI.calculate(*L->getHeader()->getParent(), BPI, *LI); 449 450 // Use BranchProbability to compute a minimum frequency based on 451 // function entry baseline frequency. Loops with headers below this 452 // frequency are considered as cold. 453 const BranchProbability ColdProb(ColdnessThreshold, 100); 454 ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb; 455 } 456 457 bool Changed = false; 458 do { 459 assert(currentLoop->isLCSSAForm(*DT)); 460 redoLoop = false; 461 Changed |= processCurrentLoop(); 462 } while(redoLoop); 463 464 // FIXME: Reconstruct dom info, because it is not preserved properly. 465 if (Changed) 466 DT->recalculate(*F); 467 return Changed; 468 } 469 470 /// Do actual work and unswitch loop if possible and profitable. 471 bool LoopUnswitch::processCurrentLoop() { 472 bool Changed = false; 473 474 initLoopData(); 475 476 // If LoopSimplify was unable to form a preheader, don't do any unswitching. 477 if (!loopPreheader) 478 return false; 479 480 // Loops with indirectbr cannot be cloned. 481 if (!currentLoop->isSafeToClone()) 482 return false; 483 484 // Without dedicated exits, splitting the exit edge may fail. 485 if (!currentLoop->hasDedicatedExits()) 486 return false; 487 488 LLVMContext &Context = loopHeader->getContext(); 489 490 // Analyze loop cost, and stop unswitching if loop content can not be duplicated. 491 if (!BranchesInfo.countLoop( 492 currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 493 *currentLoop->getHeader()->getParent()), 494 AC)) 495 return false; 496 497 // Try trivial unswitch first before loop over other basic blocks in the loop. 498 if (TryTrivialLoopUnswitch(Changed)) { 499 return true; 500 } 501 502 // Do not unswitch loops containing convergent operations, as we might be 503 // making them control dependent on the unswitch value when they were not 504 // before. 505 // FIXME: This could be refined to only bail if the convergent operation is 506 // not already control-dependent on the unswitch value. 507 for (const auto BB : currentLoop->blocks()) { 508 for (auto &I : *BB) { 509 auto CS = CallSite(&I); 510 if (!CS) continue; 511 if (CS.hasFnAttr(Attribute::Convergent)) 512 return false; 513 } 514 } 515 516 // Do not do non-trivial unswitch while optimizing for size. 517 // FIXME: Use Function::optForSize(). 518 if (OptimizeForSize || 519 loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) 520 return false; 521 522 if (LoopUnswitchWithBlockFrequency && EnabledPGO) { 523 // Compute the weighted frequency of the hottest block in the 524 // loop (loopHeader in this case since inner loops should be 525 // processed before outer loop). If it is less than ColdFrequency, 526 // we should not unswitch. 527 BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader); 528 if (LoopEntryFreq < ColdEntryFreq) 529 return false; 530 } 531 532 // Loop over all of the basic blocks in the loop. If we find an interior 533 // block that is branching on a loop-invariant condition, we can unswitch this 534 // loop. 535 for (Loop::block_iterator I = currentLoop->block_begin(), 536 E = currentLoop->block_end(); I != E; ++I) { 537 TerminatorInst *TI = (*I)->getTerminator(); 538 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 539 // If this isn't branching on an invariant condition, we can't unswitch 540 // it. 541 if (BI->isConditional()) { 542 // See if this, or some part of it, is loop invariant. If so, we can 543 // unswitch on it if we desire. 544 Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 545 currentLoop, Changed); 546 if (LoopCond && 547 UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { 548 ++NumBranches; 549 return true; 550 } 551 } 552 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 553 Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 554 currentLoop, Changed); 555 unsigned NumCases = SI->getNumCases(); 556 if (LoopCond && NumCases) { 557 // Find a value to unswitch on: 558 // FIXME: this should chose the most expensive case! 559 // FIXME: scan for a case with a non-critical edge? 560 Constant *UnswitchVal = nullptr; 561 562 // Do not process same value again and again. 563 // At this point we have some cases already unswitched and 564 // some not yet unswitched. Let's find the first not yet unswitched one. 565 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); 566 i != e; ++i) { 567 Constant *UnswitchValCandidate = i.getCaseValue(); 568 if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) { 569 UnswitchVal = UnswitchValCandidate; 570 break; 571 } 572 } 573 574 if (!UnswitchVal) 575 continue; 576 577 if (UnswitchIfProfitable(LoopCond, UnswitchVal)) { 578 ++NumSwitches; 579 return true; 580 } 581 } 582 } 583 584 // Scan the instructions to check for unswitchable values. 585 for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); 586 BBI != E; ++BBI) 587 if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) { 588 Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 589 currentLoop, Changed); 590 if (LoopCond && UnswitchIfProfitable(LoopCond, 591 ConstantInt::getTrue(Context))) { 592 ++NumSelects; 593 return true; 594 } 595 } 596 } 597 return Changed; 598 } 599 600 /// Check to see if all paths from BB exit the loop with no side effects 601 /// (including infinite loops). 602 /// 603 /// If true, we return true and set ExitBB to the block we 604 /// exit through. 605 /// 606 static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, 607 BasicBlock *&ExitBB, 608 std::set<BasicBlock*> &Visited) { 609 if (!Visited.insert(BB).second) { 610 // Already visited. Without more analysis, this could indicate an infinite 611 // loop. 612 return false; 613 } 614 if (!L->contains(BB)) { 615 // Otherwise, this is a loop exit, this is fine so long as this is the 616 // first exit. 617 if (ExitBB) return false; 618 ExitBB = BB; 619 return true; 620 } 621 622 // Otherwise, this is an unvisited intra-loop node. Check all successors. 623 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { 624 // Check to see if the successor is a trivial loop exit. 625 if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited)) 626 return false; 627 } 628 629 // Okay, everything after this looks good, check to make sure that this block 630 // doesn't include any side effects. 631 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 632 if (I->mayHaveSideEffects()) 633 return false; 634 635 return true; 636 } 637 638 /// Return true if the specified block unconditionally leads to an exit from 639 /// the specified loop, and has no side-effects in the process. If so, return 640 /// the block that is exited to, otherwise return null. 641 static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { 642 std::set<BasicBlock*> Visited; 643 Visited.insert(L->getHeader()); // Branches to header make infinite loops. 644 BasicBlock *ExitBB = nullptr; 645 if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) 646 return ExitBB; 647 return nullptr; 648 } 649 650 /// We have found that we can unswitch currentLoop when LoopCond == Val to 651 /// simplify the loop. If we decide that this is profitable, 652 /// unswitch the loop, reprocess the pieces, then return true. 653 bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val, 654 TerminatorInst *TI) { 655 // Check to see if it would be profitable to unswitch current loop. 656 if (!BranchesInfo.CostAllowsUnswitching()) { 657 DEBUG(dbgs() << "NOT unswitching loop %" 658 << currentLoop->getHeader()->getName() 659 << " at non-trivial condition '" << *Val 660 << "' == " << *LoopCond << "\n" 661 << ". Cost too high.\n"); 662 return false; 663 } 664 665 UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI); 666 return true; 667 } 668 669 /// Recursively clone the specified loop and all of its children, 670 /// mapping the blocks with the specified map. 671 static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, 672 LoopInfo *LI, LPPassManager *LPM) { 673 Loop &New = LPM->addLoop(PL); 674 675 // Add all of the blocks in L to the new loop. 676 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 677 I != E; ++I) 678 if (LI->getLoopFor(*I) == L) 679 New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI); 680 681 // Add all of the subloops to the new loop. 682 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) 683 CloneLoop(*I, &New, VM, LI, LPM); 684 685 return &New; 686 } 687 688 static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst, 689 bool Swapped) { 690 if (!SrcInst || !SrcInst->hasMetadata()) 691 return; 692 693 SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; 694 SrcInst->getAllMetadata(MDs); 695 for (auto &MD : MDs) { 696 switch (MD.first) { 697 default: 698 break; 699 case LLVMContext::MD_prof: 700 if (Swapped && MD.second->getNumOperands() == 3 && 701 isa<MDString>(MD.second->getOperand(0))) { 702 MDString *MDName = cast<MDString>(MD.second->getOperand(0)); 703 if (MDName->getString() == "branch_weights") { 704 auto *ValT = cast_or_null<ConstantAsMetadata>( 705 MD.second->getOperand(1))->getValue(); 706 auto *ValF = cast_or_null<ConstantAsMetadata>( 707 MD.second->getOperand(2))->getValue(); 708 assert(ValT && ValF && "Invalid Operands of branch_weights"); 709 auto NewMD = 710 MDBuilder(DstInst->getParent()->getContext()) 711 .createBranchWeights(cast<ConstantInt>(ValF)->getZExtValue(), 712 cast<ConstantInt>(ValT)->getZExtValue()); 713 MD.second = NewMD; 714 } 715 } 716 // fallthrough. 717 case LLVMContext::MD_make_implicit: 718 case LLVMContext::MD_dbg: 719 DstInst->setMetadata(MD.first, MD.second); 720 } 721 } 722 } 723 724 /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, 725 /// otherwise branch to FalseDest. Insert the code immediately before InsertPt. 726 void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, 727 BasicBlock *TrueDest, 728 BasicBlock *FalseDest, 729 Instruction *InsertPt, 730 TerminatorInst *TI) { 731 // Insert a conditional branch on LIC to the two preheaders. The original 732 // code is the true version and the new code is the false version. 733 Value *BranchVal = LIC; 734 bool Swapped = false; 735 if (!isa<ConstantInt>(Val) || 736 Val->getType() != Type::getInt1Ty(LIC->getContext())) 737 BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val); 738 else if (Val != ConstantInt::getTrue(Val->getContext())) { 739 // We want to enter the new loop when the condition is true. 740 std::swap(TrueDest, FalseDest); 741 Swapped = true; 742 } 743 744 // Insert the new branch. 745 BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); 746 copyMetadata(BI, TI, Swapped); 747 748 // If either edge is critical, split it. This helps preserve LoopSimplify 749 // form for enclosing loops. 750 auto Options = CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA(); 751 SplitCriticalEdge(BI, 0, Options); 752 SplitCriticalEdge(BI, 1, Options); 753 } 754 755 /// Given a loop that has a trivial unswitchable condition in it (a cond branch 756 /// from its header block to its latch block, where the path through the loop 757 /// that doesn't execute its body has no side-effects), unswitch it. This 758 /// doesn't involve any code duplication, just moving the conditional branch 759 /// outside of the loop and updating loop info. 760 void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 761 BasicBlock *ExitBlock, 762 TerminatorInst *TI) { 763 DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" 764 << loopHeader->getName() << " [" << L->getBlocks().size() 765 << " blocks] in Function " 766 << L->getHeader()->getParent()->getName() << " on cond: " << *Val 767 << " == " << *Cond << "\n"); 768 769 // First step, split the preheader, so that we know that there is a safe place 770 // to insert the conditional branch. We will change loopPreheader to have a 771 // conditional branch on Cond. 772 BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI); 773 774 // Now that we have a place to insert the conditional branch, create a place 775 // to branch to: this is the exit block out of the loop that we should 776 // short-circuit to. 777 778 // Split this block now, so that the loop maintains its exit block, and so 779 // that the jump from the preheader can execute the contents of the exit block 780 // without actually branching to it (the exit block should be dominated by the 781 // loop header, not the preheader). 782 assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); 783 BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI); 784 785 // Okay, now we have a position to branch from and a position to branch to, 786 // insert the new conditional branch. 787 EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, 788 loopPreheader->getTerminator(), TI); 789 LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L); 790 loopPreheader->getTerminator()->eraseFromParent(); 791 792 // We need to reprocess this loop, it could be unswitched again. 793 redoLoop = true; 794 795 // Now that we know that the loop is never entered when this condition is a 796 // particular value, rewrite the loop with this info. We know that this will 797 // at least eliminate the old branch. 798 RewriteLoopBodyWithConditionConstant(L, Cond, Val, false); 799 ++NumTrivial; 800 } 801 802 /// Check if the first non-constant condition starting from the loop header is 803 /// a trivial unswitch condition: that is, a condition controls whether or not 804 /// the loop does anything at all. If it is a trivial condition, unswitching 805 /// produces no code duplications (equivalently, it produces a simpler loop and 806 /// a new empty loop, which gets deleted). Therefore always unswitch trivial 807 /// condition. 808 bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) { 809 BasicBlock *CurrentBB = currentLoop->getHeader(); 810 TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); 811 LLVMContext &Context = CurrentBB->getContext(); 812 813 // If loop header has only one reachable successor (currently via an 814 // unconditional branch or constant foldable conditional branch, but 815 // should also consider adding constant foldable switch instruction in 816 // future), we should keep looking for trivial condition candidates in 817 // the successor as well. An alternative is to constant fold conditions 818 // and merge successors into loop header (then we only need to check header's 819 // terminator). The reason for not doing this in LoopUnswitch pass is that 820 // it could potentially break LoopPassManager's invariants. Folding dead 821 // branches could either eliminate the current loop or make other loops 822 // unreachable. LCSSA form might also not be preserved after deleting 823 // branches. The following code keeps traversing loop header's successors 824 // until it finds the trivial condition candidate (condition that is not a 825 // constant). Since unswitching generates branches with constant conditions, 826 // this scenario could be very common in practice. 827 SmallSet<BasicBlock*, 8> Visited; 828 829 while (true) { 830 // If we exit loop or reach a previous visited block, then 831 // we can not reach any trivial condition candidates (unfoldable 832 // branch instructions or switch instructions) and no unswitch 833 // can happen. Exit and return false. 834 if (!currentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second) 835 return false; 836 837 // Check if this loop will execute any side-effecting instructions (e.g. 838 // stores, calls, volatile loads) in the part of the loop that the code 839 // *would* execute. Check the header first. 840 for (Instruction &I : *CurrentBB) 841 if (I.mayHaveSideEffects()) 842 return false; 843 844 // FIXME: add check for constant foldable switch instructions. 845 if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) { 846 if (BI->isUnconditional()) { 847 CurrentBB = BI->getSuccessor(0); 848 } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { 849 CurrentBB = BI->getSuccessor(0); 850 } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { 851 CurrentBB = BI->getSuccessor(1); 852 } else { 853 // Found a trivial condition candidate: non-foldable conditional branch. 854 break; 855 } 856 } else { 857 break; 858 } 859 860 CurrentTerm = CurrentBB->getTerminator(); 861 } 862 863 // CondVal is the condition that controls the trivial condition. 864 // LoopExitBB is the BasicBlock that loop exits when meets trivial condition. 865 Constant *CondVal = nullptr; 866 BasicBlock *LoopExitBB = nullptr; 867 868 if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) { 869 // If this isn't branching on an invariant condition, we can't unswitch it. 870 if (!BI->isConditional()) 871 return false; 872 873 Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 874 currentLoop, Changed); 875 876 // Unswitch only if the trivial condition itself is an LIV (not 877 // partial LIV which could occur in and/or) 878 if (!LoopCond || LoopCond != BI->getCondition()) 879 return false; 880 881 // Check to see if a successor of the branch is guaranteed to 882 // exit through a unique exit block without having any 883 // side-effects. If so, determine the value of Cond that causes 884 // it to do this. 885 if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 886 BI->getSuccessor(0)))) { 887 CondVal = ConstantInt::getTrue(Context); 888 } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 889 BI->getSuccessor(1)))) { 890 CondVal = ConstantInt::getFalse(Context); 891 } 892 893 // If we didn't find a single unique LoopExit block, or if the loop exit 894 // block contains phi nodes, this isn't trivial. 895 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 896 return false; // Can't handle this. 897 898 UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, 899 CurrentTerm); 900 ++NumBranches; 901 return true; 902 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 903 // If this isn't switching on an invariant condition, we can't unswitch it. 904 Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 905 currentLoop, Changed); 906 907 // Unswitch only if the trivial condition itself is an LIV (not 908 // partial LIV which could occur in and/or) 909 if (!LoopCond || LoopCond != SI->getCondition()) 910 return false; 911 912 // Check to see if a successor of the switch is guaranteed to go to the 913 // latch block or exit through a one exit block without having any 914 // side-effects. If so, determine the value of Cond that causes it to do 915 // this. 916 // Note that we can't trivially unswitch on the default case or 917 // on already unswitched cases. 918 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); 919 i != e; ++i) { 920 BasicBlock *LoopExitCandidate; 921 if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, 922 i.getCaseSuccessor()))) { 923 // Okay, we found a trivial case, remember the value that is trivial. 924 ConstantInt *CaseVal = i.getCaseValue(); 925 926 // Check that it was not unswitched before, since already unswitched 927 // trivial vals are looks trivial too. 928 if (BranchesInfo.isUnswitched(SI, CaseVal)) 929 continue; 930 LoopExitBB = LoopExitCandidate; 931 CondVal = CaseVal; 932 break; 933 } 934 } 935 936 // If we didn't find a single unique LoopExit block, or if the loop exit 937 // block contains phi nodes, this isn't trivial. 938 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 939 return false; // Can't handle this. 940 941 UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, 942 nullptr); 943 ++NumSwitches; 944 return true; 945 } 946 return false; 947 } 948 949 /// Split all of the edges from inside the loop to their exit blocks. 950 /// Update the appropriate Phi nodes as we do so. 951 void LoopUnswitch::SplitExitEdges(Loop *L, 952 const SmallVectorImpl<BasicBlock *> &ExitBlocks){ 953 954 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 955 BasicBlock *ExitBlock = ExitBlocks[i]; 956 SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), 957 pred_end(ExitBlock)); 958 959 // Although SplitBlockPredecessors doesn't preserve loop-simplify in 960 // general, if we call it on all predecessors of all exits then it does. 961 SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, 962 /*PreserveLCSSA*/ true); 963 } 964 } 965 966 /// We determined that the loop is profitable to unswitch when LIC equal Val. 967 /// Split it into loop versions and test the condition outside of either loop. 968 /// Return the loops created as Out1/Out2. 969 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 970 Loop *L, TerminatorInst *TI) { 971 Function *F = loopHeader->getParent(); 972 DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" 973 << loopHeader->getName() << " [" << L->getBlocks().size() 974 << " blocks] in Function " << F->getName() 975 << " when '" << *Val << "' == " << *LIC << "\n"); 976 977 if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) 978 SEWP->getSE().forgetLoop(L); 979 980 LoopBlocks.clear(); 981 NewBlocks.clear(); 982 983 // First step, split the preheader and exit blocks, and add these blocks to 984 // the LoopBlocks list. 985 BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, DT, LI); 986 LoopBlocks.push_back(NewPreheader); 987 988 // We want the loop to come after the preheader, but before the exit blocks. 989 LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); 990 991 SmallVector<BasicBlock*, 8> ExitBlocks; 992 L->getUniqueExitBlocks(ExitBlocks); 993 994 // Split all of the edges from inside the loop to their exit blocks. Update 995 // the appropriate Phi nodes as we do so. 996 SplitExitEdges(L, ExitBlocks); 997 998 // The exit blocks may have been changed due to edge splitting, recompute. 999 ExitBlocks.clear(); 1000 L->getUniqueExitBlocks(ExitBlocks); 1001 1002 // Add exit blocks to the loop blocks. 1003 LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); 1004 1005 // Next step, clone all of the basic blocks that make up the loop (including 1006 // the loop preheader and exit blocks), keeping track of the mapping between 1007 // the instructions and blocks. 1008 NewBlocks.reserve(LoopBlocks.size()); 1009 ValueToValueMapTy VMap; 1010 for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { 1011 BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); 1012 1013 NewBlocks.push_back(NewBB); 1014 VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. 1015 LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); 1016 } 1017 1018 // Splice the newly inserted blocks into the function right before the 1019 // original preheader. 1020 F->getBasicBlockList().splice(NewPreheader->getIterator(), 1021 F->getBasicBlockList(), 1022 NewBlocks[0]->getIterator(), F->end()); 1023 1024 // FIXME: We could register any cloned assumptions instead of clearing the 1025 // whole function's cache. 1026 AC->clear(); 1027 1028 // Now we create the new Loop object for the versioned loop. 1029 Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); 1030 1031 // Recalculate unswitching quota, inherit simplified switches info for NewBB, 1032 // Probably clone more loop-unswitch related loop properties. 1033 BranchesInfo.cloneData(NewLoop, L, VMap); 1034 1035 Loop *ParentLoop = L->getParentLoop(); 1036 if (ParentLoop) { 1037 // Make sure to add the cloned preheader and exit blocks to the parent loop 1038 // as well. 1039 ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); 1040 } 1041 1042 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 1043 BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); 1044 // The new exit block should be in the same loop as the old one. 1045 if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) 1046 ExitBBLoop->addBasicBlockToLoop(NewExit, *LI); 1047 1048 assert(NewExit->getTerminator()->getNumSuccessors() == 1 && 1049 "Exit block should have been split to have one successor!"); 1050 BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); 1051 1052 // If the successor of the exit block had PHI nodes, add an entry for 1053 // NewExit. 1054 for (BasicBlock::iterator I = ExitSucc->begin(); 1055 PHINode *PN = dyn_cast<PHINode>(I); ++I) { 1056 Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); 1057 ValueToValueMapTy::iterator It = VMap.find(V); 1058 if (It != VMap.end()) V = It->second; 1059 PN->addIncoming(V, NewExit); 1060 } 1061 1062 if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { 1063 PHINode *PN = PHINode::Create(LPad->getType(), 0, "", 1064 &*ExitSucc->getFirstInsertionPt()); 1065 1066 for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); 1067 I != E; ++I) { 1068 BasicBlock *BB = *I; 1069 LandingPadInst *LPI = BB->getLandingPadInst(); 1070 LPI->replaceAllUsesWith(PN); 1071 PN->addIncoming(LPI, BB); 1072 } 1073 } 1074 } 1075 1076 // Rewrite the code to refer to itself. 1077 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1078 for (BasicBlock::iterator I = NewBlocks[i]->begin(), 1079 E = NewBlocks[i]->end(); I != E; ++I) 1080 RemapInstruction(&*I, VMap, 1081 RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); 1082 1083 // Rewrite the original preheader to select between versions of the loop. 1084 BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); 1085 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && 1086 "Preheader splitting did not work correctly!"); 1087 1088 // Emit the new branch that selects between the two versions of this loop. 1089 EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, 1090 TI); 1091 LPM->deleteSimpleAnalysisValue(OldBR, L); 1092 OldBR->eraseFromParent(); 1093 1094 LoopProcessWorklist.push_back(NewLoop); 1095 redoLoop = true; 1096 1097 // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody 1098 // deletes the instruction (for example by simplifying a PHI that feeds into 1099 // the condition that we're unswitching on), we don't rewrite the second 1100 // iteration. 1101 WeakVH LICHandle(LIC); 1102 1103 // Now we rewrite the original code to know that the condition is true and the 1104 // new code to know that the condition is false. 1105 RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); 1106 1107 // It's possible that simplifying one loop could cause the other to be 1108 // changed to another value or a constant. If its a constant, don't simplify 1109 // it. 1110 if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && 1111 LICHandle && !isa<Constant>(LICHandle)) 1112 RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true); 1113 } 1114 1115 /// Remove all instances of I from the worklist vector specified. 1116 static void RemoveFromWorklist(Instruction *I, 1117 std::vector<Instruction*> &Worklist) { 1118 1119 Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I), 1120 Worklist.end()); 1121 } 1122 1123 /// When we find that I really equals V, remove I from the 1124 /// program, replacing all uses with V and update the worklist. 1125 static void ReplaceUsesOfWith(Instruction *I, Value *V, 1126 std::vector<Instruction*> &Worklist, 1127 Loop *L, LPPassManager *LPM) { 1128 DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); 1129 1130 // Add uses to the worklist, which may be dead now. 1131 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1132 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1133 Worklist.push_back(Use); 1134 1135 // Add users to the worklist which may be simplified now. 1136 for (User *U : I->users()) 1137 Worklist.push_back(cast<Instruction>(U)); 1138 LPM->deleteSimpleAnalysisValue(I, L); 1139 RemoveFromWorklist(I, Worklist); 1140 I->replaceAllUsesWith(V); 1141 I->eraseFromParent(); 1142 ++NumSimplify; 1143 } 1144 1145 /// We know either that the value LIC has the value specified by Val in the 1146 /// specified loop, or we know it does NOT have that value. 1147 /// Rewrite any uses of LIC or of properties correlated to it. 1148 void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 1149 Constant *Val, 1150 bool IsEqual) { 1151 assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); 1152 1153 // FIXME: Support correlated properties, like: 1154 // for (...) 1155 // if (li1 < li2) 1156 // ... 1157 // if (li1 > li2) 1158 // ... 1159 1160 // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, 1161 // selects, switches. 1162 std::vector<Instruction*> Worklist; 1163 LLVMContext &Context = Val->getContext(); 1164 1165 // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC 1166 // in the loop with the appropriate one directly. 1167 if (IsEqual || (isa<ConstantInt>(Val) && 1168 Val->getType()->isIntegerTy(1))) { 1169 Value *Replacement; 1170 if (IsEqual) 1171 Replacement = Val; 1172 else 1173 Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 1174 !cast<ConstantInt>(Val)->getZExtValue()); 1175 1176 for (User *U : LIC->users()) { 1177 Instruction *UI = dyn_cast<Instruction>(U); 1178 if (!UI || !L->contains(UI)) 1179 continue; 1180 Worklist.push_back(UI); 1181 } 1182 1183 for (std::vector<Instruction*>::iterator UI = Worklist.begin(), 1184 UE = Worklist.end(); UI != UE; ++UI) 1185 (*UI)->replaceUsesOfWith(LIC, Replacement); 1186 1187 SimplifyCode(Worklist, L); 1188 return; 1189 } 1190 1191 // Otherwise, we don't know the precise value of LIC, but we do know that it 1192 // is certainly NOT "Val". As such, simplify any uses in the loop that we 1193 // can. This case occurs when we unswitch switch statements. 1194 for (User *U : LIC->users()) { 1195 Instruction *UI = dyn_cast<Instruction>(U); 1196 if (!UI || !L->contains(UI)) 1197 continue; 1198 1199 Worklist.push_back(UI); 1200 1201 // TODO: We could do other simplifications, for example, turning 1202 // 'icmp eq LIC, Val' -> false. 1203 1204 // If we know that LIC is not Val, use this info to simplify code. 1205 SwitchInst *SI = dyn_cast<SwitchInst>(UI); 1206 if (!SI || !isa<ConstantInt>(Val)) continue; 1207 1208 SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); 1209 // Default case is live for multiple values. 1210 if (DeadCase == SI->case_default()) continue; 1211 1212 // Found a dead case value. Don't remove PHI nodes in the 1213 // successor if they become single-entry, those PHI nodes may 1214 // be in the Users list. 1215 1216 BasicBlock *Switch = SI->getParent(); 1217 BasicBlock *SISucc = DeadCase.getCaseSuccessor(); 1218 BasicBlock *Latch = L->getLoopLatch(); 1219 1220 BranchesInfo.setUnswitched(SI, Val); 1221 1222 if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. 1223 // If the DeadCase successor dominates the loop latch, then the 1224 // transformation isn't safe since it will delete the sole predecessor edge 1225 // to the latch. 1226 if (Latch && DT->dominates(SISucc, Latch)) 1227 continue; 1228 1229 // FIXME: This is a hack. We need to keep the successor around 1230 // and hooked up so as to preserve the loop structure, because 1231 // trying to update it is complicated. So instead we preserve the 1232 // loop structure and put the block on a dead code path. 1233 SplitEdge(Switch, SISucc, DT, LI); 1234 // Compute the successors instead of relying on the return value 1235 // of SplitEdge, since it may have split the switch successor 1236 // after PHI nodes. 1237 BasicBlock *NewSISucc = DeadCase.getCaseSuccessor(); 1238 BasicBlock *OldSISucc = *succ_begin(NewSISucc); 1239 // Create an "unreachable" destination. 1240 BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", 1241 Switch->getParent(), 1242 OldSISucc); 1243 new UnreachableInst(Context, Abort); 1244 // Force the new case destination to branch to the "unreachable" 1245 // block while maintaining a (dead) CFG edge to the old block. 1246 NewSISucc->getTerminator()->eraseFromParent(); 1247 BranchInst::Create(Abort, OldSISucc, 1248 ConstantInt::getTrue(Context), NewSISucc); 1249 // Release the PHI operands for this edge. 1250 for (BasicBlock::iterator II = NewSISucc->begin(); 1251 PHINode *PN = dyn_cast<PHINode>(II); ++II) 1252 PN->setIncomingValue(PN->getBasicBlockIndex(Switch), 1253 UndefValue::get(PN->getType())); 1254 // Tell the domtree about the new block. We don't fully update the 1255 // domtree here -- instead we force it to do a full recomputation 1256 // after the pass is complete -- but we do need to inform it of 1257 // new blocks. 1258 DT->addNewBlock(Abort, NewSISucc); 1259 } 1260 1261 SimplifyCode(Worklist, L); 1262 } 1263 1264 /// Now that we have simplified some instructions in the loop, walk over it and 1265 /// constant prop, dce, and fold control flow where possible. Note that this is 1266 /// effectively a very simple loop-structure-aware optimizer. During processing 1267 /// of this loop, L could very well be deleted, so it must not be used. 1268 /// 1269 /// FIXME: When the loop optimizer is more mature, separate this out to a new 1270 /// pass. 1271 /// 1272 void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { 1273 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1274 while (!Worklist.empty()) { 1275 Instruction *I = Worklist.back(); 1276 Worklist.pop_back(); 1277 1278 // Simple DCE. 1279 if (isInstructionTriviallyDead(I)) { 1280 DEBUG(dbgs() << "Remove dead instruction '" << *I); 1281 1282 // Add uses to the worklist, which may be dead now. 1283 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1284 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1285 Worklist.push_back(Use); 1286 LPM->deleteSimpleAnalysisValue(I, L); 1287 RemoveFromWorklist(I, Worklist); 1288 I->eraseFromParent(); 1289 ++NumSimplify; 1290 continue; 1291 } 1292 1293 // See if instruction simplification can hack this up. This is common for 1294 // things like "select false, X, Y" after unswitching made the condition be 1295 // 'false'. TODO: update the domtree properly so we can pass it here. 1296 if (Value *V = SimplifyInstruction(I, DL)) 1297 if (LI->replacementPreservesLCSSAForm(I, V)) { 1298 ReplaceUsesOfWith(I, V, Worklist, L, LPM); 1299 continue; 1300 } 1301 1302 // Special case hacks that appear commonly in unswitched code. 1303 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1304 if (BI->isUnconditional()) { 1305 // If BI's parent is the only pred of the successor, fold the two blocks 1306 // together. 1307 BasicBlock *Pred = BI->getParent(); 1308 BasicBlock *Succ = BI->getSuccessor(0); 1309 BasicBlock *SinglePred = Succ->getSinglePredecessor(); 1310 if (!SinglePred) continue; // Nothing to do. 1311 assert(SinglePred == Pred && "CFG broken"); 1312 1313 DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " 1314 << Succ->getName() << "\n"); 1315 1316 // Resolve any single entry PHI nodes in Succ. 1317 while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) 1318 ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM); 1319 1320 // If Succ has any successors with PHI nodes, update them to have 1321 // entries coming from Pred instead of Succ. 1322 Succ->replaceAllUsesWith(Pred); 1323 1324 // Move all of the successor contents from Succ to Pred. 1325 Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(), 1326 Succ->begin(), Succ->end()); 1327 LPM->deleteSimpleAnalysisValue(BI, L); 1328 BI->eraseFromParent(); 1329 RemoveFromWorklist(BI, Worklist); 1330 1331 // Remove Succ from the loop tree. 1332 LI->removeBlock(Succ); 1333 LPM->deleteSimpleAnalysisValue(Succ, L); 1334 Succ->eraseFromParent(); 1335 ++NumSimplify; 1336 continue; 1337 } 1338 1339 continue; 1340 } 1341 } 1342 } 1343