Home | History | Annotate | Download | only in Utils
      1 //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements some loop unrolling utilities. It does not define any
     11 // actual pass or policy, but provides a single function to perform loop
     12 // unrolling.
     13 //
     14 // The process of unrolling can produce extraneous basic blocks linked with
     15 // unconditional branches.  This will be corrected in the future.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/ADT/SmallPtrSet.h"
     20 #include "llvm/ADT/Statistic.h"
     21 #include "llvm/Analysis/AssumptionCache.h"
     22 #include "llvm/Analysis/InstructionSimplify.h"
     23 #include "llvm/Analysis/LoopIterator.h"
     24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
     25 #include "llvm/Analysis/ScalarEvolution.h"
     26 #include "llvm/Transforms/Utils/Local.h"
     27 #include "llvm/IR/BasicBlock.h"
     28 #include "llvm/IR/DataLayout.h"
     29 #include "llvm/IR/DebugInfoMetadata.h"
     30 #include "llvm/IR/Dominators.h"
     31 #include "llvm/IR/IntrinsicInst.h"
     32 #include "llvm/IR/LLVMContext.h"
     33 #include "llvm/Support/Debug.h"
     34 #include "llvm/Support/raw_ostream.h"
     35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     36 #include "llvm/Transforms/Utils/Cloning.h"
     37 #include "llvm/Transforms/Utils/LoopSimplify.h"
     38 #include "llvm/Transforms/Utils/LoopUtils.h"
     39 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
     40 #include "llvm/Transforms/Utils/UnrollLoop.h"
     41 using namespace llvm;
     42 
     43 #define DEBUG_TYPE "loop-unroll"
     44 
     45 // TODO: Should these be here or in LoopUnroll?
     46 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
     47 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
     48 
     49 static cl::opt<bool>
     50 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
     51                     cl::desc("Allow runtime unrolled loops to be unrolled "
     52                              "with epilog instead of prolog."));
     53 
     54 static cl::opt<bool>
     55 UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
     56                     cl::desc("Verify domtree after unrolling"),
     57 #ifdef NDEBUG
     58     cl::init(false)
     59 #else
     60     cl::init(true)
     61 #endif
     62                     );
     63 
     64 /// Convert the instruction operands from referencing the current values into
     65 /// those specified by VMap.
     66 void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
     67   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
     68     Value *Op = I->getOperand(op);
     69 
     70     // Unwrap arguments of dbg.value intrinsics.
     71     bool Wrapped = false;
     72     if (auto *V = dyn_cast<MetadataAsValue>(Op))
     73       if (auto *Unwrapped = dyn_cast<ValueAsMetadata>(V->getMetadata())) {
     74         Op = Unwrapped->getValue();
     75         Wrapped = true;
     76       }
     77 
     78     auto wrap = [&](Value *V) {
     79       auto &C = I->getContext();
     80       return Wrapped ? MetadataAsValue::get(C, ValueAsMetadata::get(V)) : V;
     81     };
     82 
     83     ValueToValueMapTy::iterator It = VMap.find(Op);
     84     if (It != VMap.end())
     85       I->setOperand(op, wrap(It->second));
     86   }
     87 
     88   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     89     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     90       ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
     91       if (It != VMap.end())
     92         PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
     93     }
     94   }
     95 }
     96 
     97 /// Folds a basic block into its predecessor if it only has one predecessor, and
     98 /// that predecessor only has one successor.
     99 /// The LoopInfo Analysis that is passed will be kept consistent.
    100 BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
    101                                            ScalarEvolution *SE,
    102                                            DominatorTree *DT) {
    103   // Merge basic blocks into their predecessor if there is only one distinct
    104   // pred, and if there is only one distinct successor of the predecessor, and
    105   // if there are no PHI nodes.
    106   BasicBlock *OnlyPred = BB->getSinglePredecessor();
    107   if (!OnlyPred) return nullptr;
    108 
    109   if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
    110     return nullptr;
    111 
    112   LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
    113                     << OnlyPred->getName() << "\n");
    114 
    115   // Resolve any PHI nodes at the start of the block.  They are all
    116   // guaranteed to have exactly one entry if they exist, unless there are
    117   // multiple duplicate (but guaranteed to be equal) entries for the
    118   // incoming edges.  This occurs when there are multiple edges from
    119   // OnlyPred to OnlySucc.
    120   FoldSingleEntryPHINodes(BB);
    121 
    122   // Delete the unconditional branch from the predecessor...
    123   OnlyPred->getInstList().pop_back();
    124 
    125   // Make all PHI nodes that referred to BB now refer to Pred as their
    126   // source...
    127   BB->replaceAllUsesWith(OnlyPred);
    128 
    129   // Move all definitions in the successor to the predecessor...
    130   OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
    131 
    132   // OldName will be valid until erased.
    133   StringRef OldName = BB->getName();
    134 
    135   // Erase the old block and update dominator info.
    136   if (DT)
    137     if (DomTreeNode *DTN = DT->getNode(BB)) {
    138       DomTreeNode *PredDTN = DT->getNode(OnlyPred);
    139       SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
    140       for (auto *DI : Children)
    141         DT->changeImmediateDominator(DI, PredDTN);
    142 
    143       DT->eraseNode(BB);
    144     }
    145 
    146   LI->removeBlock(BB);
    147 
    148   // Inherit predecessor's name if it exists...
    149   if (!OldName.empty() && !OnlyPred->hasName())
    150     OnlyPred->setName(OldName);
    151 
    152   BB->eraseFromParent();
    153 
    154   return OnlyPred;
    155 }
    156 
    157 /// Check if unrolling created a situation where we need to insert phi nodes to
    158 /// preserve LCSSA form.
    159 /// \param Blocks is a vector of basic blocks representing unrolled loop.
    160 /// \param L is the outer loop.
    161 /// It's possible that some of the blocks are in L, and some are not. In this
    162 /// case, if there is a use is outside L, and definition is inside L, we need to
    163 /// insert a phi-node, otherwise LCSSA will be broken.
    164 /// The function is just a helper function for llvm::UnrollLoop that returns
    165 /// true if this situation occurs, indicating that LCSSA needs to be fixed.
    166 static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
    167                                      LoopInfo *LI) {
    168   for (BasicBlock *BB : Blocks) {
    169     if (LI->getLoopFor(BB) == L)
    170       continue;
    171     for (Instruction &I : *BB) {
    172       for (Use &U : I.operands()) {
    173         if (auto Def = dyn_cast<Instruction>(U)) {
    174           Loop *DefLoop = LI->getLoopFor(Def->getParent());
    175           if (!DefLoop)
    176             continue;
    177           if (DefLoop->contains(L))
    178             return true;
    179         }
    180       }
    181     }
    182   }
    183   return false;
    184 }
    185 
    186 /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
    187 /// and adds a mapping from the original loop to the new loop to NewLoops.
    188 /// Returns nullptr if no new loop was created and a pointer to the
    189 /// original loop OriginalBB was part of otherwise.
    190 const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
    191                                            BasicBlock *ClonedBB, LoopInfo *LI,
    192                                            NewLoopsMap &NewLoops) {
    193   // Figure out which loop New is in.
    194   const Loop *OldLoop = LI->getLoopFor(OriginalBB);
    195   assert(OldLoop && "Should (at least) be in the loop being unrolled!");
    196 
    197   Loop *&NewLoop = NewLoops[OldLoop];
    198   if (!NewLoop) {
    199     // Found a new sub-loop.
    200     assert(OriginalBB == OldLoop->getHeader() &&
    201            "Header should be first in RPO");
    202 
    203     NewLoop = LI->AllocateLoop();
    204     Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
    205 
    206     if (NewLoopParent)
    207       NewLoopParent->addChildLoop(NewLoop);
    208     else
    209       LI->addTopLevelLoop(NewLoop);
    210 
    211     NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
    212     return OldLoop;
    213   } else {
    214     NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
    215     return nullptr;
    216   }
    217 }
    218 
    219 /// The function chooses which type of unroll (epilog or prolog) is more
    220 /// profitabale.
    221 /// Epilog unroll is more profitable when there is PHI that starts from
    222 /// constant.  In this case epilog will leave PHI start from constant,
    223 /// but prolog will convert it to non-constant.
    224 ///
    225 /// loop:
    226 ///   PN = PHI [I, Latch], [CI, PreHeader]
    227 ///   I = foo(PN)
    228 ///   ...
    229 ///
    230 /// Epilog unroll case.
    231 /// loop:
    232 ///   PN = PHI [I2, Latch], [CI, PreHeader]
    233 ///   I1 = foo(PN)
    234 ///   I2 = foo(I1)
    235 ///   ...
    236 /// Prolog unroll case.
    237 ///   NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
    238 /// loop:
    239 ///   PN = PHI [I2, Latch], [NewPN, PreHeader]
    240 ///   I1 = foo(PN)
    241 ///   I2 = foo(I1)
    242 ///   ...
    243 ///
    244 static bool isEpilogProfitable(Loop *L) {
    245   BasicBlock *PreHeader = L->getLoopPreheader();
    246   BasicBlock *Header = L->getHeader();
    247   assert(PreHeader && Header);
    248   for (const PHINode &PN : Header->phis()) {
    249     if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
    250       return true;
    251   }
    252   return false;
    253 }
    254 
    255 /// Perform some cleanup and simplifications on loops after unrolling. It is
    256 /// useful to simplify the IV's in the new loop, as well as do a quick
    257 /// simplify/dce pass of the instructions.
    258 void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
    259                                    ScalarEvolution *SE, DominatorTree *DT,
    260                                    AssumptionCache *AC) {
    261   // Simplify any new induction variables in the partially unrolled loop.
    262   if (SE && SimplifyIVs) {
    263     SmallVector<WeakTrackingVH, 16> DeadInsts;
    264     simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
    265 
    266     // Aggressively clean up dead instructions that simplifyLoopIVs already
    267     // identified. Any remaining should be cleaned up below.
    268     while (!DeadInsts.empty())
    269       if (Instruction *Inst =
    270               dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
    271         RecursivelyDeleteTriviallyDeadInstructions(Inst);
    272   }
    273 
    274   // At this point, the code is well formed.  We now do a quick sweep over the
    275   // inserted code, doing constant propagation and dead code elimination as we
    276   // go.
    277   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
    278   const std::vector<BasicBlock *> &NewLoopBlocks = L->getBlocks();
    279   for (BasicBlock *BB : NewLoopBlocks) {
    280     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
    281       Instruction *Inst = &*I++;
    282 
    283       if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
    284         if (LI->replacementPreservesLCSSAForm(Inst, V))
    285           Inst->replaceAllUsesWith(V);
    286       if (isInstructionTriviallyDead(Inst))
    287         BB->getInstList().erase(Inst);
    288     }
    289   }
    290 
    291   // TODO: after peeling or unrolling, previously loop variant conditions are
    292   // likely to fold to constants, eagerly propagating those here will require
    293   // fewer cleanup passes to be run.  Alternatively, a LoopEarlyCSE might be
    294   // appropriate.
    295 }
    296 
    297 /// Unroll the given loop by Count. The loop must be in LCSSA form.  Unrolling
    298 /// can only fail when the loop's latch block is not terminated by a conditional
    299 /// branch instruction. However, if the trip count (and multiple) are not known,
    300 /// loop unrolling will mostly produce more code that is no faster.
    301 ///
    302 /// TripCount is the upper bound of the iteration on which control exits
    303 /// LatchBlock. Control may exit the loop prior to TripCount iterations either
    304 /// via an early branch in other loop block or via LatchBlock terminator. This
    305 /// is relaxed from the general definition of trip count which is the number of
    306 /// times the loop header executes. Note that UnrollLoop assumes that the loop
    307 /// counter test is in LatchBlock in order to remove unnecesssary instances of
    308 /// the test.  If control can exit the loop from the LatchBlock's terminator
    309 /// prior to TripCount iterations, flag PreserveCondBr needs to be set.
    310 ///
    311 /// PreserveCondBr indicates whether the conditional branch of the LatchBlock
    312 /// needs to be preserved.  It is needed when we use trip count upper bound to
    313 /// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
    314 /// conditional branch needs to be preserved.
    315 ///
    316 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
    317 /// execute without exiting the loop.
    318 ///
    319 /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
    320 /// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
    321 /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
    322 /// iterations before branching into the unrolled loop.  UnrollLoop will not
    323 /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
    324 /// AllowExpensiveTripCount is false.
    325 ///
    326 /// If we want to perform PGO-based loop peeling, PeelCount is set to the
    327 /// number of iterations we want to peel off.
    328 ///
    329 /// The LoopInfo Analysis that is passed will be kept consistent.
    330 ///
    331 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
    332 /// DominatorTree if they are non-null.
    333 LoopUnrollResult llvm::UnrollLoop(
    334     Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
    335     bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
    336     unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
    337     LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
    338     OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
    339 
    340   BasicBlock *Preheader = L->getLoopPreheader();
    341   if (!Preheader) {
    342     LLVM_DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    343     return LoopUnrollResult::Unmodified;
    344   }
    345 
    346   BasicBlock *LatchBlock = L->getLoopLatch();
    347   if (!LatchBlock) {
    348     LLVM_DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    349     return LoopUnrollResult::Unmodified;
    350   }
    351 
    352   // Loops with indirectbr cannot be cloned.
    353   if (!L->isSafeToClone()) {
    354     LLVM_DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    355     return LoopUnrollResult::Unmodified;
    356   }
    357 
    358   // The current loop unroll pass can only unroll loops with a single latch
    359   // that's a conditional branch exiting the loop.
    360   // FIXME: The implementation can be extended to work with more complicated
    361   // cases, e.g. loops with multiple latches.
    362   BasicBlock *Header = L->getHeader();
    363   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
    364 
    365   if (!BI || BI->isUnconditional()) {
    366     // The loop-rotate pass can be helpful to avoid this in many cases.
    367     LLVM_DEBUG(
    368         dbgs()
    369         << "  Can't unroll; loop not terminated by a conditional branch.\n");
    370     return LoopUnrollResult::Unmodified;
    371   }
    372 
    373   auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
    374     return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
    375   };
    376 
    377   if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
    378     LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
    379                          " exiting the loop can be unrolled\n");
    380     return LoopUnrollResult::Unmodified;
    381   }
    382 
    383   if (Header->hasAddressTaken()) {
    384     // The loop-rotate pass can be helpful to avoid this in many cases.
    385     LLVM_DEBUG(
    386         dbgs() << "  Won't unroll loop: address of header block is taken.\n");
    387     return LoopUnrollResult::Unmodified;
    388   }
    389 
    390   if (TripCount != 0)
    391     LLVM_DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
    392   if (TripMultiple != 1)
    393     LLVM_DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");
    394 
    395   // Effectively "DCE" unrolled iterations that are beyond the tripcount
    396   // and will never be executed.
    397   if (TripCount != 0 && Count > TripCount)
    398     Count = TripCount;
    399 
    400   // Don't enter the unroll code if there is nothing to do.
    401   if (TripCount == 0 && Count < 2 && PeelCount == 0) {
    402     LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
    403     return LoopUnrollResult::Unmodified;
    404   }
    405 
    406   assert(Count > 0);
    407   assert(TripMultiple > 0);
    408   assert(TripCount == 0 || TripCount % TripMultiple == 0);
    409 
    410   // Are we eliminating the loop control altogether?
    411   bool CompletelyUnroll = Count == TripCount;
    412   SmallVector<BasicBlock *, 4> ExitBlocks;
    413   L->getExitBlocks(ExitBlocks);
    414   std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
    415 
    416   // Go through all exits of L and see if there are any phi-nodes there. We just
    417   // conservatively assume that they're inserted to preserve LCSSA form, which
    418   // means that complete unrolling might break this form. We need to either fix
    419   // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
    420   // now we just recompute LCSSA for the outer loop, but it should be possible
    421   // to fix it in-place.
    422   bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
    423                         any_of(ExitBlocks, [](const BasicBlock *BB) {
    424                           return isa<PHINode>(BB->begin());
    425                         });
    426 
    427   // We assume a run-time trip count if the compiler cannot
    428   // figure out the loop trip count and the unroll-runtime
    429   // flag is specified.
    430   bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
    431 
    432   assert((!RuntimeTripCount || !PeelCount) &&
    433          "Did not expect runtime trip-count unrolling "
    434          "and peeling for the same loop");
    435 
    436   bool Peeled = false;
    437   if (PeelCount) {
    438     Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
    439 
    440     // Successful peeling may result in a change in the loop preheader/trip
    441     // counts. If we later unroll the loop, we want these to be updated.
    442     if (Peeled) {
    443       BasicBlock *ExitingBlock = L->getExitingBlock();
    444       assert(ExitingBlock && "Loop without exiting block?");
    445       Preheader = L->getLoopPreheader();
    446       TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
    447       TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
    448     }
    449   }
    450 
    451   // Loops containing convergent instructions must have a count that divides
    452   // their TripMultiple.
    453   LLVM_DEBUG(
    454       {
    455         bool HasConvergent = false;
    456         for (auto &BB : L->blocks())
    457           for (auto &I : *BB)
    458             if (auto CS = CallSite(&I))
    459               HasConvergent |= CS.isConvergent();
    460         assert((!HasConvergent || TripMultiple % Count == 0) &&
    461                "Unroll count must divide trip multiple if loop contains a "
    462                "convergent operation.");
    463       });
    464 
    465   bool EpilogProfitability =
    466       UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
    467                                               : isEpilogProfitable(L);
    468 
    469   if (RuntimeTripCount && TripMultiple % Count != 0 &&
    470       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
    471                                   EpilogProfitability, UnrollRemainder, LI, SE,
    472                                   DT, AC, PreserveLCSSA)) {
    473     if (Force)
    474       RuntimeTripCount = false;
    475     else {
    476       LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
    477                            "generated when assuming runtime trip count\n");
    478       return LoopUnrollResult::Unmodified;
    479     }
    480   }
    481 
    482   // If we know the trip count, we know the multiple...
    483   unsigned BreakoutTrip = 0;
    484   if (TripCount != 0) {
    485     BreakoutTrip = TripCount % Count;
    486     TripMultiple = 0;
    487   } else {
    488     // Figure out what multiple to use.
    489     BreakoutTrip = TripMultiple =
    490       (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
    491   }
    492 
    493   using namespace ore;
    494   // Report the unrolling decision.
    495   if (CompletelyUnroll) {
    496     LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
    497                       << " with trip count " << TripCount << "!\n");
    498     if (ORE)
    499       ORE->emit([&]() {
    500         return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
    501                                   L->getHeader())
    502                << "completely unrolled loop with "
    503                << NV("UnrollCount", TripCount) << " iterations";
    504       });
    505   } else if (PeelCount) {
    506     LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
    507                       << " with iteration count " << PeelCount << "!\n");
    508     if (ORE)
    509       ORE->emit([&]() {
    510         return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
    511                                   L->getHeader())
    512                << " peeled loop by " << NV("PeelCount", PeelCount)
    513                << " iterations";
    514       });
    515   } else {
    516     auto DiagBuilder = [&]() {
    517       OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
    518                               L->getHeader());
    519       return Diag << "unrolled loop by a factor of "
    520                   << NV("UnrollCount", Count);
    521     };
    522 
    523     LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
    524                       << Count);
    525     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
    526       LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
    527       if (ORE)
    528         ORE->emit([&]() {
    529           return DiagBuilder() << " with a breakout at trip "
    530                                << NV("BreakoutTrip", BreakoutTrip);
    531         });
    532     } else if (TripMultiple != 1) {
    533       LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
    534       if (ORE)
    535         ORE->emit([&]() {
    536           return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
    537                                << " trips per branch";
    538         });
    539     } else if (RuntimeTripCount) {
    540       LLVM_DEBUG(dbgs() << " with run-time trip count");
    541       if (ORE)
    542         ORE->emit(
    543             [&]() { return DiagBuilder() << " with run-time trip count"; });
    544     }
    545     LLVM_DEBUG(dbgs() << "!\n");
    546   }
    547 
    548   // We are going to make changes to this loop. SCEV may be keeping cached info
    549   // about it, in particular about backedge taken count. The changes we make
    550   // are guaranteed to invalidate this information for our loop. It is tempting
    551   // to only invalidate the loop being unrolled, but it is incorrect as long as
    552   // all exiting branches from all inner loops have impact on the outer loops,
    553   // and if something changes inside them then any of outer loops may also
    554   // change. When we forget outermost loop, we also forget all contained loops
    555   // and this is what we need here.
    556   if (SE)
    557     SE->forgetTopmostLoop(L);
    558 
    559   bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
    560   BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
    561 
    562   // For the first iteration of the loop, we should use the precloned values for
    563   // PHI nodes.  Insert associations now.
    564   ValueToValueMapTy LastValueMap;
    565   std::vector<PHINode*> OrigPHINode;
    566   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    567     OrigPHINode.push_back(cast<PHINode>(I));
    568   }
    569 
    570   std::vector<BasicBlock*> Headers;
    571   std::vector<BasicBlock*> Latches;
    572   Headers.push_back(Header);
    573   Latches.push_back(LatchBlock);
    574 
    575   // The current on-the-fly SSA update requires blocks to be processed in
    576   // reverse postorder so that LastValueMap contains the correct value at each
    577   // exit.
    578   LoopBlocksDFS DFS(L);
    579   DFS.perform(LI);
    580 
    581   // Stash the DFS iterators before adding blocks to the loop.
    582   LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
    583   LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
    584 
    585   std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
    586 
    587   // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
    588   // might break loop-simplified form for these loops (as they, e.g., would
    589   // share the same exit blocks). We'll keep track of loops for which we can
    590   // break this so that later we can re-simplify them.
    591   SmallSetVector<Loop *, 4> LoopsToSimplify;
    592   for (Loop *SubLoop : *L)
    593     LoopsToSimplify.insert(SubLoop);
    594 
    595   if (Header->getParent()->isDebugInfoForProfiling())
    596     for (BasicBlock *BB : L->getBlocks())
    597       for (Instruction &I : *BB)
    598         if (!isa<DbgInfoIntrinsic>(&I))
    599           if (const DILocation *DIL = I.getDebugLoc())
    600             I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
    601 
    602   for (unsigned It = 1; It != Count; ++It) {
    603     std::vector<BasicBlock*> NewBlocks;
    604     SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
    605     NewLoops[L] = L;
    606 
    607     for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
    608       ValueToValueMapTy VMap;
    609       BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
    610       Header->getParent()->getBasicBlockList().push_back(New);
    611 
    612       assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
    613              "Header should not be in a sub-loop");
    614       // Tell LI about New.
    615       const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
    616       if (OldLoop)
    617         LoopsToSimplify.insert(NewLoops[OldLoop]);
    618 
    619       if (*BB == Header)
    620         // Loop over all of the PHI nodes in the block, changing them to use
    621         // the incoming values from the previous block.
    622         for (PHINode *OrigPHI : OrigPHINode) {
    623           PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
    624           Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
    625           if (Instruction *InValI = dyn_cast<Instruction>(InVal))
    626             if (It > 1 && L->contains(InValI))
    627               InVal = LastValueMap[InValI];
    628           VMap[OrigPHI] = InVal;
    629           New->getInstList().erase(NewPHI);
    630         }
    631 
    632       // Update our running map of newest clones
    633       LastValueMap[*BB] = New;
    634       for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
    635            VI != VE; ++VI)
    636         LastValueMap[VI->first] = VI->second;
    637 
    638       // Add phi entries for newly created values to all exit blocks.
    639       for (BasicBlock *Succ : successors(*BB)) {
    640         if (L->contains(Succ))
    641           continue;
    642         for (PHINode &PHI : Succ->phis()) {
    643           Value *Incoming = PHI.getIncomingValueForBlock(*BB);
    644           ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
    645           if (It != LastValueMap.end())
    646             Incoming = It->second;
    647           PHI.addIncoming(Incoming, New);
    648         }
    649       }
    650       // Keep track of new headers and latches as we create them, so that
    651       // we can insert the proper branches later.
    652       if (*BB == Header)
    653         Headers.push_back(New);
    654       if (*BB == LatchBlock)
    655         Latches.push_back(New);
    656 
    657       NewBlocks.push_back(New);
    658       UnrolledLoopBlocks.push_back(New);
    659 
    660       // Update DomTree: since we just copy the loop body, and each copy has a
    661       // dedicated entry block (copy of the header block), this header's copy
    662       // dominates all copied blocks. That means, dominance relations in the
    663       // copied body are the same as in the original body.
    664       if (DT) {
    665         if (*BB == Header)
    666           DT->addNewBlock(New, Latches[It - 1]);
    667         else {
    668           auto BBDomNode = DT->getNode(*BB);
    669           auto BBIDom = BBDomNode->getIDom();
    670           BasicBlock *OriginalBBIDom = BBIDom->getBlock();
    671           DT->addNewBlock(
    672               New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
    673         }
    674       }
    675     }
    676 
    677     // Remap all instructions in the most recent iteration
    678     for (BasicBlock *NewBlock : NewBlocks) {
    679       for (Instruction &I : *NewBlock) {
    680         ::remapInstruction(&I, LastValueMap);
    681         if (auto *II = dyn_cast<IntrinsicInst>(&I))
    682           if (II->getIntrinsicID() == Intrinsic::assume)
    683             AC->registerAssumption(II);
    684       }
    685     }
    686   }
    687 
    688   // Loop over the PHI nodes in the original block, setting incoming values.
    689   for (PHINode *PN : OrigPHINode) {
    690     if (CompletelyUnroll) {
    691       PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
    692       Header->getInstList().erase(PN);
    693     }
    694     else if (Count > 1) {
    695       Value *InVal = PN->removeIncomingValue(LatchBlock, false);
    696       // If this value was defined in the loop, take the value defined by the
    697       // last iteration of the loop.
    698       if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
    699         if (L->contains(InValI))
    700           InVal = LastValueMap[InVal];
    701       }
    702       assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
    703       PN->addIncoming(InVal, Latches.back());
    704     }
    705   }
    706 
    707   // Now that all the basic blocks for the unrolled iterations are in place,
    708   // set up the branches to connect them.
    709   for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    710     // The original branch was replicated in each unrolled iteration.
    711     BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    712 
    713     // The branch destination.
    714     unsigned j = (i + 1) % e;
    715     BasicBlock *Dest = Headers[j];
    716     bool NeedConditional = true;
    717 
    718     if (RuntimeTripCount && j != 0) {
    719       NeedConditional = false;
    720     }
    721 
    722     // For a complete unroll, make the last iteration end with a branch
    723     // to the exit block.
    724     if (CompletelyUnroll) {
    725       if (j == 0)
    726         Dest = LoopExit;
    727       // If using trip count upper bound to completely unroll, we need to keep
    728       // the conditional branch except the last one because the loop may exit
    729       // after any iteration.
    730       assert(NeedConditional &&
    731              "NeedCondition cannot be modified by both complete "
    732              "unrolling and runtime unrolling");
    733       NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
    734     } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
    735       // If we know the trip count or a multiple of it, we can safely use an
    736       // unconditional branch for some iterations.
    737       NeedConditional = false;
    738     }
    739 
    740     if (NeedConditional) {
    741       // Update the conditional branch's successor for the following
    742       // iteration.
    743       Term->setSuccessor(!ContinueOnTrue, Dest);
    744     } else {
    745       // Remove phi operands at this loop exit
    746       if (Dest != LoopExit) {
    747         BasicBlock *BB = Latches[i];
    748         for (BasicBlock *Succ: successors(BB)) {
    749           if (Succ == Headers[i])
    750             continue;
    751           for (PHINode &Phi : Succ->phis())
    752             Phi.removeIncomingValue(BB, false);
    753         }
    754       }
    755       // Replace the conditional branch with an unconditional one.
    756       BranchInst::Create(Dest, Term);
    757       Term->eraseFromParent();
    758     }
    759   }
    760 
    761   // Update dominators of blocks we might reach through exits.
    762   // Immediate dominator of such block might change, because we add more
    763   // routes which can lead to the exit: we can now reach it from the copied
    764   // iterations too.
    765   if (DT && Count > 1) {
    766     for (auto *BB : OriginalLoopBlocks) {
    767       auto *BBDomNode = DT->getNode(BB);
    768       SmallVector<BasicBlock *, 16> ChildrenToUpdate;
    769       for (auto *ChildDomNode : BBDomNode->getChildren()) {
    770         auto *ChildBB = ChildDomNode->getBlock();
    771         if (!L->contains(ChildBB))
    772           ChildrenToUpdate.push_back(ChildBB);
    773       }
    774       BasicBlock *NewIDom;
    775       if (BB == LatchBlock) {
    776         // The latch is special because we emit unconditional branches in
    777         // some cases where the original loop contained a conditional branch.
    778         // Since the latch is always at the bottom of the loop, if the latch
    779         // dominated an exit before unrolling, the new dominator of that exit
    780         // must also be a latch.  Specifically, the dominator is the first
    781         // latch which ends in a conditional branch, or the last latch if
    782         // there is no such latch.
    783         NewIDom = Latches.back();
    784         for (BasicBlock *IterLatch : Latches) {
    785           TerminatorInst *Term = IterLatch->getTerminator();
    786           if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
    787             NewIDom = IterLatch;
    788             break;
    789           }
    790         }
    791       } else {
    792         // The new idom of the block will be the nearest common dominator
    793         // of all copies of the previous idom. This is equivalent to the
    794         // nearest common dominator of the previous idom and the first latch,
    795         // which dominates all copies of the previous idom.
    796         NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
    797       }
    798       for (auto *ChildBB : ChildrenToUpdate)
    799         DT->changeImmediateDominator(ChildBB, NewIDom);
    800     }
    801   }
    802 
    803   assert(!DT || !UnrollVerifyDomtree ||
    804       DT->verify(DominatorTree::VerificationLevel::Fast));
    805 
    806   // Merge adjacent basic blocks, if possible.
    807   for (BasicBlock *Latch : Latches) {
    808     BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
    809     if (Term->isUnconditional()) {
    810       BasicBlock *Dest = Term->getSuccessor(0);
    811       if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
    812         // Dest has been folded into Fold. Update our worklists accordingly.
    813         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    814         UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
    815                                              UnrolledLoopBlocks.end(), Dest),
    816                                  UnrolledLoopBlocks.end());
    817       }
    818     }
    819   }
    820 
    821   // At this point, the code is well formed.  We now simplify the unrolled loop,
    822   // doing constant propagation and dead code elimination as we go.
    823   simplifyLoopAfterUnroll(L, !CompletelyUnroll && (Count > 1 || Peeled), LI, SE,
    824                           DT, AC);
    825 
    826   NumCompletelyUnrolled += CompletelyUnroll;
    827   ++NumUnrolled;
    828 
    829   Loop *OuterL = L->getParentLoop();
    830   // Update LoopInfo if the loop is completely removed.
    831   if (CompletelyUnroll)
    832     LI->erase(L);
    833 
    834   // After complete unrolling most of the blocks should be contained in OuterL.
    835   // However, some of them might happen to be out of OuterL (e.g. if they
    836   // precede a loop exit). In this case we might need to insert PHI nodes in
    837   // order to preserve LCSSA form.
    838   // We don't need to check this if we already know that we need to fix LCSSA
    839   // form.
    840   // TODO: For now we just recompute LCSSA for the outer loop in this case, but
    841   // it should be possible to fix it in-place.
    842   if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
    843     NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
    844 
    845   // If we have a pass and a DominatorTree we should re-simplify impacted loops
    846   // to ensure subsequent analyses can rely on this form. We want to simplify
    847   // at least one layer outside of the loop that was unrolled so that any
    848   // changes to the parent loop exposed by the unrolling are considered.
    849   if (DT) {
    850     if (OuterL) {
    851       // OuterL includes all loops for which we can break loop-simplify, so
    852       // it's sufficient to simplify only it (it'll recursively simplify inner
    853       // loops too).
    854       if (NeedToFixLCSSA) {
    855         // LCSSA must be performed on the outermost affected loop. The unrolled
    856         // loop's last loop latch is guaranteed to be in the outermost loop
    857         // after LoopInfo's been updated by LoopInfo::erase.
    858         Loop *LatchLoop = LI->getLoopFor(Latches.back());
    859         Loop *FixLCSSALoop = OuterL;
    860         if (!FixLCSSALoop->contains(LatchLoop))
    861           while (FixLCSSALoop->getParentLoop() != LatchLoop)
    862             FixLCSSALoop = FixLCSSALoop->getParentLoop();
    863 
    864         formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
    865       } else if (PreserveLCSSA) {
    866         assert(OuterL->isLCSSAForm(*DT) &&
    867                "Loops should be in LCSSA form after loop-unroll.");
    868       }
    869 
    870       // TODO: That potentially might be compile-time expensive. We should try
    871       // to fix the loop-simplified form incrementally.
    872       simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
    873     } else {
    874       // Simplify loops for which we might've broken loop-simplify form.
    875       for (Loop *SubLoop : LoopsToSimplify)
    876         simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA);
    877     }
    878   }
    879 
    880   return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
    881                           : LoopUnrollResult::PartiallyUnrolled;
    882 }
    883 
    884 /// Given an llvm.loop loop id metadata node, returns the loop hint metadata
    885 /// node with the given name (for example, "llvm.loop.unroll.count"). If no
    886 /// such metadata node exists, then nullptr is returned.
    887 MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
    888   // First operand should refer to the loop id itself.
    889   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
    890   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
    891 
    892   for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
    893     MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
    894     if (!MD)
    895       continue;
    896 
    897     MDString *S = dyn_cast<MDString>(MD->getOperand(0));
    898     if (!S)
    899       continue;
    900 
    901     if (Name.equals(S->getString()))
    902       return MD;
    903   }
    904   return nullptr;
    905 }
    906