Home | History | Annotate | Download | only in Scalar
      1 //===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass implements a simple loop unroller.  It works best when loops have
     11 // been canonicalized by the -indvars pass, allowing it to determine the trip
     12 // counts of loops easily.
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/ADT/SetVector.h"
     16 #include "llvm/Analysis/AssumptionCache.h"
     17 #include "llvm/Analysis/CodeMetrics.h"
     18 #include "llvm/Analysis/GlobalsModRef.h"
     19 #include "llvm/Analysis/InstructionSimplify.h"
     20 #include "llvm/Analysis/LoopPass.h"
     21 #include "llvm/Analysis/LoopUnrollAnalyzer.h"
     22 #include "llvm/Analysis/ScalarEvolution.h"
     23 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
     24 #include "llvm/Analysis/TargetTransformInfo.h"
     25 #include "llvm/IR/DataLayout.h"
     26 #include "llvm/IR/DiagnosticInfo.h"
     27 #include "llvm/IR/Dominators.h"
     28 #include "llvm/IR/InstVisitor.h"
     29 #include "llvm/IR/IntrinsicInst.h"
     30 #include "llvm/IR/Metadata.h"
     31 #include "llvm/Support/CommandLine.h"
     32 #include "llvm/Support/Debug.h"
     33 #include "llvm/Support/raw_ostream.h"
     34 #include "llvm/Transforms/Scalar.h"
     35 #include "llvm/Transforms/Utils/LoopUtils.h"
     36 #include "llvm/Transforms/Utils/UnrollLoop.h"
     37 #include <climits>
     38 #include <utility>
     39 
     40 using namespace llvm;
     41 
     42 #define DEBUG_TYPE "loop-unroll"
     43 
     44 static cl::opt<unsigned>
     45     UnrollThreshold("unroll-threshold", cl::Hidden,
     46                     cl::desc("The baseline cost threshold for loop unrolling"));
     47 
     48 static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
     49     "unroll-percent-dynamic-cost-saved-threshold", cl::init(50), cl::Hidden,
     50     cl::desc("The percentage of estimated dynamic cost which must be saved by "
     51              "unrolling to allow unrolling up to the max threshold."));
     52 
     53 static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
     54     "unroll-dynamic-cost-savings-discount", cl::init(100), cl::Hidden,
     55     cl::desc("This is the amount discounted from the total unroll cost when "
     56              "the unrolled form has a high dynamic cost savings (triggered by "
     57              "the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
     58 
     59 static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
     60     "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
     61     cl::desc("Don't allow loop unrolling to simulate more than this number of"
     62              "iterations when checking full unroll profitability"));
     63 
     64 static cl::opt<unsigned> UnrollCount(
     65     "unroll-count", cl::Hidden,
     66     cl::desc("Use this unroll count for all loops including those with "
     67              "unroll_count pragma values, for testing purposes"));
     68 
     69 static cl::opt<unsigned> UnrollMaxCount(
     70     "unroll-max-count", cl::Hidden,
     71     cl::desc("Set the max unroll count for partial and runtime unrolling, for"
     72              "testing purposes"));
     73 
     74 static cl::opt<unsigned> UnrollFullMaxCount(
     75     "unroll-full-max-count", cl::Hidden,
     76     cl::desc(
     77         "Set the max unroll count for full unrolling, for testing purposes"));
     78 
     79 static cl::opt<bool>
     80     UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
     81                        cl::desc("Allows loops to be partially unrolled until "
     82                                 "-unroll-threshold loop size is reached."));
     83 
     84 static cl::opt<bool> UnrollAllowRemainder(
     85     "unroll-allow-remainder", cl::Hidden,
     86     cl::desc("Allow generation of a loop remainder (extra iterations) "
     87              "when unrolling a loop."));
     88 
     89 static cl::opt<bool>
     90     UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
     91                   cl::desc("Unroll loops with run-time trip counts"));
     92 
     93 static cl::opt<unsigned> PragmaUnrollThreshold(
     94     "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
     95     cl::desc("Unrolled size limit for loops with an unroll(full) or "
     96              "unroll_count pragma."));
     97 
     98 /// A magic value for use with the Threshold parameter to indicate
     99 /// that the loop unroll should be performed regardless of how much
    100 /// code expansion would result.
    101 static const unsigned NoThreshold = UINT_MAX;
    102 
    103 /// Default unroll count for loops with run-time trip count if
    104 /// -unroll-count is not set
    105 static const unsigned DefaultUnrollRuntimeCount = 8;
    106 
    107 /// Gather the various unrolling parameters based on the defaults, compiler
    108 /// flags, TTI overrides and user specified parameters.
    109 static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
    110     Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
    111     Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
    112     Optional<bool> UserRuntime) {
    113   TargetTransformInfo::UnrollingPreferences UP;
    114 
    115   // Set up the defaults
    116   UP.Threshold = 150;
    117   UP.PercentDynamicCostSavedThreshold = 50;
    118   UP.DynamicCostSavingsDiscount = 100;
    119   UP.OptSizeThreshold = 0;
    120   UP.PartialThreshold = UP.Threshold;
    121   UP.PartialOptSizeThreshold = 0;
    122   UP.Count = 0;
    123   UP.MaxCount = UINT_MAX;
    124   UP.FullUnrollMaxCount = UINT_MAX;
    125   UP.Partial = false;
    126   UP.Runtime = false;
    127   UP.AllowRemainder = true;
    128   UP.AllowExpensiveTripCount = false;
    129   UP.Force = false;
    130 
    131   // Override with any target specific settings
    132   TTI.getUnrollingPreferences(L, UP);
    133 
    134   // Apply size attributes
    135   if (L->getHeader()->getParent()->optForSize()) {
    136     UP.Threshold = UP.OptSizeThreshold;
    137     UP.PartialThreshold = UP.PartialOptSizeThreshold;
    138   }
    139 
    140   // Apply any user values specified by cl::opt
    141   if (UnrollThreshold.getNumOccurrences() > 0) {
    142     UP.Threshold = UnrollThreshold;
    143     UP.PartialThreshold = UnrollThreshold;
    144   }
    145   if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
    146     UP.PercentDynamicCostSavedThreshold =
    147         UnrollPercentDynamicCostSavedThreshold;
    148   if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
    149     UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
    150   if (UnrollMaxCount.getNumOccurrences() > 0)
    151     UP.MaxCount = UnrollMaxCount;
    152   if (UnrollFullMaxCount.getNumOccurrences() > 0)
    153     UP.FullUnrollMaxCount = UnrollFullMaxCount;
    154   if (UnrollAllowPartial.getNumOccurrences() > 0)
    155     UP.Partial = UnrollAllowPartial;
    156   if (UnrollAllowRemainder.getNumOccurrences() > 0)
    157     UP.AllowRemainder = UnrollAllowRemainder;
    158   if (UnrollRuntime.getNumOccurrences() > 0)
    159     UP.Runtime = UnrollRuntime;
    160 
    161   // Apply user values provided by argument
    162   if (UserThreshold.hasValue()) {
    163     UP.Threshold = *UserThreshold;
    164     UP.PartialThreshold = *UserThreshold;
    165   }
    166   if (UserCount.hasValue())
    167     UP.Count = *UserCount;
    168   if (UserAllowPartial.hasValue())
    169     UP.Partial = *UserAllowPartial;
    170   if (UserRuntime.hasValue())
    171     UP.Runtime = *UserRuntime;
    172 
    173   return UP;
    174 }
    175 
    176 namespace {
    177 /// A struct to densely store the state of an instruction after unrolling at
    178 /// each iteration.
    179 ///
    180 /// This is designed to work like a tuple of <Instruction *, int> for the
    181 /// purposes of hashing and lookup, but to be able to associate two boolean
    182 /// states with each key.
    183 struct UnrolledInstState {
    184   Instruction *I;
    185   int Iteration : 30;
    186   unsigned IsFree : 1;
    187   unsigned IsCounted : 1;
    188 };
    189 
    190 /// Hashing and equality testing for a set of the instruction states.
    191 struct UnrolledInstStateKeyInfo {
    192   typedef DenseMapInfo<Instruction *> PtrInfo;
    193   typedef DenseMapInfo<std::pair<Instruction *, int>> PairInfo;
    194   static inline UnrolledInstState getEmptyKey() {
    195     return {PtrInfo::getEmptyKey(), 0, 0, 0};
    196   }
    197   static inline UnrolledInstState getTombstoneKey() {
    198     return {PtrInfo::getTombstoneKey(), 0, 0, 0};
    199   }
    200   static inline unsigned getHashValue(const UnrolledInstState &S) {
    201     return PairInfo::getHashValue({S.I, S.Iteration});
    202   }
    203   static inline bool isEqual(const UnrolledInstState &LHS,
    204                              const UnrolledInstState &RHS) {
    205     return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
    206   }
    207 };
    208 }
    209 
    210 namespace {
    211 struct EstimatedUnrollCost {
    212   /// \brief The estimated cost after unrolling.
    213   int UnrolledCost;
    214 
    215   /// \brief The estimated dynamic cost of executing the instructions in the
    216   /// rolled form.
    217   int RolledDynamicCost;
    218 };
    219 }
    220 
    221 /// \brief Figure out if the loop is worth full unrolling.
    222 ///
    223 /// Complete loop unrolling can make some loads constant, and we need to know
    224 /// if that would expose any further optimization opportunities.  This routine
    225 /// estimates this optimization.  It computes cost of unrolled loop
    226 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
    227 /// dynamic cost we mean that we won't count costs of blocks that are known not
    228 /// to be executed (i.e. if we have a branch in the loop and we know that at the
    229 /// given iteration its condition would be resolved to true, we won't add up the
    230 /// cost of the 'false'-block).
    231 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
    232 /// the analysis failed (no benefits expected from the unrolling, or the loop is
    233 /// too big to analyze), the returned value is None.
    234 static Optional<EstimatedUnrollCost>
    235 analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
    236                       ScalarEvolution &SE, const TargetTransformInfo &TTI,
    237                       int MaxUnrolledLoopSize) {
    238   // We want to be able to scale offsets by the trip count and add more offsets
    239   // to them without checking for overflows, and we already don't want to
    240   // analyze *massive* trip counts, so we force the max to be reasonably small.
    241   assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
    242          "The unroll iterations max is too large!");
    243 
    244   // Only analyze inner loops. We can't properly estimate cost of nested loops
    245   // and we won't visit inner loops again anyway.
    246   if (!L->empty())
    247     return None;
    248 
    249   // Don't simulate loops with a big or unknown tripcount
    250   if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
    251       TripCount > UnrollMaxIterationsCountToAnalyze)
    252     return None;
    253 
    254   SmallSetVector<BasicBlock *, 16> BBWorklist;
    255   SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
    256   DenseMap<Value *, Constant *> SimplifiedValues;
    257   SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
    258 
    259   // The estimated cost of the unrolled form of the loop. We try to estimate
    260   // this by simplifying as much as we can while computing the estimate.
    261   int UnrolledCost = 0;
    262 
    263   // We also track the estimated dynamic (that is, actually executed) cost in
    264   // the rolled form. This helps identify cases when the savings from unrolling
    265   // aren't just exposing dead control flows, but actual reduced dynamic
    266   // instructions due to the simplifications which we expect to occur after
    267   // unrolling.
    268   int RolledDynamicCost = 0;
    269 
    270   // We track the simplification of each instruction in each iteration. We use
    271   // this to recursively merge costs into the unrolled cost on-demand so that
    272   // we don't count the cost of any dead code. This is essentially a map from
    273   // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
    274   DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;
    275 
    276   // A small worklist used to accumulate cost of instructions from each
    277   // observable and reached root in the loop.
    278   SmallVector<Instruction *, 16> CostWorklist;
    279 
    280   // PHI-used worklist used between iterations while accumulating cost.
    281   SmallVector<Instruction *, 4> PHIUsedList;
    282 
    283   // Helper function to accumulate cost for instructions in the loop.
    284   auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
    285     assert(Iteration >= 0 && "Cannot have a negative iteration!");
    286     assert(CostWorklist.empty() && "Must start with an empty cost list");
    287     assert(PHIUsedList.empty() && "Must start with an empty phi used list");
    288     CostWorklist.push_back(&RootI);
    289     for (;; --Iteration) {
    290       do {
    291         Instruction *I = CostWorklist.pop_back_val();
    292 
    293         // InstCostMap only uses I and Iteration as a key, the other two values
    294         // don't matter here.
    295         auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
    296         if (CostIter == InstCostMap.end())
    297           // If an input to a PHI node comes from a dead path through the loop
    298           // we may have no cost data for it here. What that actually means is
    299           // that it is free.
    300           continue;
    301         auto &Cost = *CostIter;
    302         if (Cost.IsCounted)
    303           // Already counted this instruction.
    304           continue;
    305 
    306         // Mark that we are counting the cost of this instruction now.
    307         Cost.IsCounted = true;
    308 
    309         // If this is a PHI node in the loop header, just add it to the PHI set.
    310         if (auto *PhiI = dyn_cast<PHINode>(I))
    311           if (PhiI->getParent() == L->getHeader()) {
    312             assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
    313                                   "inherently simplify during unrolling.");
    314             if (Iteration == 0)
    315               continue;
    316 
    317             // Push the incoming value from the backedge into the PHI used list
    318             // if it is an in-loop instruction. We'll use this to populate the
    319             // cost worklist for the next iteration (as we count backwards).
    320             if (auto *OpI = dyn_cast<Instruction>(
    321                     PhiI->getIncomingValueForBlock(L->getLoopLatch())))
    322               if (L->contains(OpI))
    323                 PHIUsedList.push_back(OpI);
    324             continue;
    325           }
    326 
    327         // First accumulate the cost of this instruction.
    328         if (!Cost.IsFree) {
    329           UnrolledCost += TTI.getUserCost(I);
    330           DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
    331                        << "): ");
    332           DEBUG(I->dump());
    333         }
    334 
    335         // We must count the cost of every operand which is not free,
    336         // recursively. If we reach a loop PHI node, simply add it to the set
    337         // to be considered on the next iteration (backwards!).
    338         for (Value *Op : I->operands()) {
    339           // Check whether this operand is free due to being a constant or
    340           // outside the loop.
    341           auto *OpI = dyn_cast<Instruction>(Op);
    342           if (!OpI || !L->contains(OpI))
    343             continue;
    344 
    345           // Otherwise accumulate its cost.
    346           CostWorklist.push_back(OpI);
    347         }
    348       } while (!CostWorklist.empty());
    349 
    350       if (PHIUsedList.empty())
    351         // We've exhausted the search.
    352         break;
    353 
    354       assert(Iteration > 0 &&
    355              "Cannot track PHI-used values past the first iteration!");
    356       CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
    357       PHIUsedList.clear();
    358     }
    359   };
    360 
    361   // Ensure that we don't violate the loop structure invariants relied on by
    362   // this analysis.
    363   assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
    364   assert(L->isLCSSAForm(DT) &&
    365          "Must have loops in LCSSA form to track live-out values.");
    366 
    367   DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
    368 
    369   // Simulate execution of each iteration of the loop counting instructions,
    370   // which would be simplified.
    371   // Since the same load will take different values on different iterations,
    372   // we literally have to go through all loop's iterations.
    373   for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    374     DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
    375 
    376     // Prepare for the iteration by collecting any simplified entry or backedge
    377     // inputs.
    378     for (Instruction &I : *L->getHeader()) {
    379       auto *PHI = dyn_cast<PHINode>(&I);
    380       if (!PHI)
    381         break;
    382 
    383       // The loop header PHI nodes must have exactly two input: one from the
    384       // loop preheader and one from the loop latch.
    385       assert(
    386           PHI->getNumIncomingValues() == 2 &&
    387           "Must have an incoming value only for the preheader and the latch.");
    388 
    389       Value *V = PHI->getIncomingValueForBlock(
    390           Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
    391       Constant *C = dyn_cast<Constant>(V);
    392       if (Iteration != 0 && !C)
    393         C = SimplifiedValues.lookup(V);
    394       if (C)
    395         SimplifiedInputValues.push_back({PHI, C});
    396     }
    397 
    398     // Now clear and re-populate the map for the next iteration.
    399     SimplifiedValues.clear();
    400     while (!SimplifiedInputValues.empty())
    401       SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
    402 
    403     UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
    404 
    405     BBWorklist.clear();
    406     BBWorklist.insert(L->getHeader());
    407     // Note that we *must not* cache the size, this loop grows the worklist.
    408     for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
    409       BasicBlock *BB = BBWorklist[Idx];
    410 
    411       // Visit all instructions in the given basic block and try to simplify
    412       // it.  We don't change the actual IR, just count optimization
    413       // opportunities.
    414       for (Instruction &I : *BB) {
    415         // Track this instruction's expected baseline cost when executing the
    416         // rolled loop form.
    417         RolledDynamicCost += TTI.getUserCost(&I);
    418 
    419         // Visit the instruction to analyze its loop cost after unrolling,
    420         // and if the visitor returns true, mark the instruction as free after
    421         // unrolling and continue.
    422         bool IsFree = Analyzer.visit(I);
    423         bool Inserted = InstCostMap.insert({&I, (int)Iteration,
    424                                            (unsigned)IsFree,
    425                                            /*IsCounted*/ false}).second;
    426         (void)Inserted;
    427         assert(Inserted && "Cannot have a state for an unvisited instruction!");
    428 
    429         if (IsFree)
    430           continue;
    431 
    432         // If the instruction might have a side-effect recursively account for
    433         // the cost of it and all the instructions leading up to it.
    434         if (I.mayHaveSideEffects())
    435           AddCostRecursively(I, Iteration);
    436 
    437         // Can't properly model a cost of a call.
    438         // FIXME: With a proper cost model we should be able to do it.
    439         if(isa<CallInst>(&I))
    440           return None;
    441 
    442         // If unrolled body turns out to be too big, bail out.
    443         if (UnrolledCost > MaxUnrolledLoopSize) {
    444           DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
    445                        << "  UnrolledCost: " << UnrolledCost
    446                        << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
    447                        << "\n");
    448           return None;
    449         }
    450       }
    451 
    452       TerminatorInst *TI = BB->getTerminator();
    453 
    454       // Add in the live successors by first checking whether we have terminator
    455       // that may be simplified based on the values simplified by this call.
    456       BasicBlock *KnownSucc = nullptr;
    457       if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
    458         if (BI->isConditional()) {
    459           if (Constant *SimpleCond =
    460                   SimplifiedValues.lookup(BI->getCondition())) {
    461             // Just take the first successor if condition is undef
    462             if (isa<UndefValue>(SimpleCond))
    463               KnownSucc = BI->getSuccessor(0);
    464             else if (ConstantInt *SimpleCondVal =
    465                          dyn_cast<ConstantInt>(SimpleCond))
    466               KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
    467           }
    468         }
    469       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
    470         if (Constant *SimpleCond =
    471                 SimplifiedValues.lookup(SI->getCondition())) {
    472           // Just take the first successor if condition is undef
    473           if (isa<UndefValue>(SimpleCond))
    474             KnownSucc = SI->getSuccessor(0);
    475           else if (ConstantInt *SimpleCondVal =
    476                        dyn_cast<ConstantInt>(SimpleCond))
    477             KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
    478         }
    479       }
    480       if (KnownSucc) {
    481         if (L->contains(KnownSucc))
    482           BBWorklist.insert(KnownSucc);
    483         else
    484           ExitWorklist.insert({BB, KnownSucc});
    485         continue;
    486       }
    487 
    488       // Add BB's successors to the worklist.
    489       for (BasicBlock *Succ : successors(BB))
    490         if (L->contains(Succ))
    491           BBWorklist.insert(Succ);
    492         else
    493           ExitWorklist.insert({BB, Succ});
    494       AddCostRecursively(*TI, Iteration);
    495     }
    496 
    497     // If we found no optimization opportunities on the first iteration, we
    498     // won't find them on later ones too.
    499     if (UnrolledCost == RolledDynamicCost) {
    500       DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
    501                    << "  UnrolledCost: " << UnrolledCost << "\n");
    502       return None;
    503     }
    504   }
    505 
    506   while (!ExitWorklist.empty()) {
    507     BasicBlock *ExitingBB, *ExitBB;
    508     std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
    509 
    510     for (Instruction &I : *ExitBB) {
    511       auto *PN = dyn_cast<PHINode>(&I);
    512       if (!PN)
    513         break;
    514 
    515       Value *Op = PN->getIncomingValueForBlock(ExitingBB);
    516       if (auto *OpI = dyn_cast<Instruction>(Op))
    517         if (L->contains(OpI))
    518           AddCostRecursively(*OpI, TripCount - 1);
    519     }
    520   }
    521 
    522   DEBUG(dbgs() << "Analysis finished:\n"
    523                << "UnrolledCost: " << UnrolledCost << ", "
    524                << "RolledDynamicCost: " << RolledDynamicCost << "\n");
    525   return {{UnrolledCost, RolledDynamicCost}};
    526 }
    527 
    528 /// ApproximateLoopSize - Approximate the size of the loop.
    529 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
    530                                     bool &NotDuplicatable, bool &Convergent,
    531                                     const TargetTransformInfo &TTI,
    532                                     AssumptionCache *AC) {
    533   SmallPtrSet<const Value *, 32> EphValues;
    534   CodeMetrics::collectEphemeralValues(L, AC, EphValues);
    535 
    536   CodeMetrics Metrics;
    537   for (BasicBlock *BB : L->blocks())
    538     Metrics.analyzeBasicBlock(BB, TTI, EphValues);
    539   NumCalls = Metrics.NumInlineCandidates;
    540   NotDuplicatable = Metrics.notDuplicatable;
    541   Convergent = Metrics.convergent;
    542 
    543   unsigned LoopSize = Metrics.NumInsts;
    544 
    545   // Don't allow an estimate of size zero.  This would allows unrolling of loops
    546   // with huge iteration counts, which is a compile time problem even if it's
    547   // not a problem for code quality. Also, the code using this size may assume
    548   // that each loop has at least three instructions (likely a conditional
    549   // branch, a comparison feeding that branch, and some kind of loop increment
    550   // feeding that comparison instruction).
    551   LoopSize = std::max(LoopSize, 3u);
    552 
    553   return LoopSize;
    554 }
    555 
    556 // Returns the loop hint metadata node with the given name (for example,
    557 // "llvm.loop.unroll.count").  If no such metadata node exists, then nullptr is
    558 // returned.
    559 static MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) {
    560   if (MDNode *LoopID = L->getLoopID())
    561     return GetUnrollMetadata(LoopID, Name);
    562   return nullptr;
    563 }
    564 
    565 // Returns true if the loop has an unroll(full) pragma.
    566 static bool HasUnrollFullPragma(const Loop *L) {
    567   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
    568 }
    569 
    570 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
    571 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
    572 static bool HasUnrollEnablePragma(const Loop *L) {
    573   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
    574 }
    575 
    576 // Returns true if the loop has an unroll(disable) pragma.
    577 static bool HasUnrollDisablePragma(const Loop *L) {
    578   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
    579 }
    580 
    581 // Returns true if the loop has an runtime unroll(disable) pragma.
    582 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
    583   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
    584 }
    585 
    586 // If loop has an unroll_count pragma return the (necessarily
    587 // positive) value from the pragma.  Otherwise return 0.
    588 static unsigned UnrollCountPragmaValue(const Loop *L) {
    589   MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
    590   if (MD) {
    591     assert(MD->getNumOperands() == 2 &&
    592            "Unroll count hint metadata should have two operands.");
    593     unsigned Count =
    594         mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
    595     assert(Count >= 1 && "Unroll count must be positive.");
    596     return Count;
    597   }
    598   return 0;
    599 }
    600 
    601 // Remove existing unroll metadata and add unroll disable metadata to
    602 // indicate the loop has already been unrolled.  This prevents a loop
    603 // from being unrolled more than is directed by a pragma if the loop
    604 // unrolling pass is run more than once (which it generally is).
    605 static void SetLoopAlreadyUnrolled(Loop *L) {
    606   MDNode *LoopID = L->getLoopID();
    607   // First remove any existing loop unrolling metadata.
    608   SmallVector<Metadata *, 4> MDs;
    609   // Reserve first location for self reference to the LoopID metadata node.
    610   MDs.push_back(nullptr);
    611 
    612   if (LoopID) {
    613     for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
    614       bool IsUnrollMetadata = false;
    615       MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
    616       if (MD) {
    617         const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
    618         IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
    619       }
    620       if (!IsUnrollMetadata)
    621         MDs.push_back(LoopID->getOperand(i));
    622     }
    623   }
    624 
    625   // Add unroll(disable) metadata to disable future unrolling.
    626   LLVMContext &Context = L->getHeader()->getContext();
    627   SmallVector<Metadata *, 1> DisableOperands;
    628   DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
    629   MDNode *DisableNode = MDNode::get(Context, DisableOperands);
    630   MDs.push_back(DisableNode);
    631 
    632   MDNode *NewLoopID = MDNode::get(Context, MDs);
    633   // Set operand 0 to refer to the loop id itself.
    634   NewLoopID->replaceOperandWith(0, NewLoopID);
    635   L->setLoopID(NewLoopID);
    636 }
    637 
    638 static bool canUnrollCompletely(Loop *L, unsigned Threshold,
    639                                 unsigned PercentDynamicCostSavedThreshold,
    640                                 unsigned DynamicCostSavingsDiscount,
    641                                 uint64_t UnrolledCost,
    642                                 uint64_t RolledDynamicCost) {
    643   if (Threshold == NoThreshold) {
    644     DEBUG(dbgs() << "  Can fully unroll, because no threshold is set.\n");
    645     return true;
    646   }
    647 
    648   if (UnrolledCost <= Threshold) {
    649     DEBUG(dbgs() << "  Can fully unroll, because unrolled cost: "
    650                  << UnrolledCost << "<" << Threshold << "\n");
    651     return true;
    652   }
    653 
    654   assert(UnrolledCost && "UnrolledCost can't be 0 at this point.");
    655   assert(RolledDynamicCost >= UnrolledCost &&
    656          "Cannot have a higher unrolled cost than a rolled cost!");
    657 
    658   // Compute the percentage of the dynamic cost in the rolled form that is
    659   // saved when unrolled. If unrolling dramatically reduces the estimated
    660   // dynamic cost of the loop, we use a higher threshold to allow more
    661   // unrolling.
    662   unsigned PercentDynamicCostSaved =
    663       (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost;
    664 
    665   if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold &&
    666       (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
    667           (int64_t)Threshold) {
    668     DEBUG(dbgs() << "  Can fully unroll, because unrolling will reduce the "
    669                     "expected dynamic cost by "
    670                  << PercentDynamicCostSaved << "% (threshold: "
    671                  << PercentDynamicCostSavedThreshold << "%)\n"
    672                  << "  and the unrolled cost (" << UnrolledCost
    673                  << ") is less than the max threshold ("
    674                  << DynamicCostSavingsDiscount << ").\n");
    675     return true;
    676   }
    677 
    678   DEBUG(dbgs() << "  Too large to fully unroll:\n");
    679   DEBUG(dbgs() << "    Threshold: " << Threshold << "\n");
    680   DEBUG(dbgs() << "    Max threshold: " << DynamicCostSavingsDiscount << "\n");
    681   DEBUG(dbgs() << "    Percent cost saved threshold: "
    682                << PercentDynamicCostSavedThreshold << "%\n");
    683   DEBUG(dbgs() << "    Unrolled cost: " << UnrolledCost << "\n");
    684   DEBUG(dbgs() << "    Rolled dynamic cost: " << RolledDynamicCost << "\n");
    685   DEBUG(dbgs() << "    Percent cost saved: " << PercentDynamicCostSaved
    686                << "\n");
    687   return false;
    688 }
    689 
    690 // Returns true if unroll count was set explicitly.
    691 // Calculates unroll count and writes it to UP.Count.
    692 static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
    693                                DominatorTree &DT, LoopInfo *LI,
    694                                ScalarEvolution *SE, unsigned TripCount,
    695                                unsigned TripMultiple, unsigned LoopSize,
    696                                TargetTransformInfo::UnrollingPreferences &UP) {
    697   // BEInsns represents number of instructions optimized when "back edge"
    698   // becomes "fall through" in unrolled loop.
    699   // For now we count a conditional branch on a backedge and a comparison
    700   // feeding it.
    701   unsigned BEInsns = 2;
    702   // Check for explicit Count.
    703   // 1st priority is unroll count set by "unroll-count" option.
    704   bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
    705   if (UserUnrollCount) {
    706     UP.Count = UnrollCount;
    707     UP.AllowExpensiveTripCount = true;
    708     UP.Force = true;
    709     if (UP.AllowRemainder &&
    710         (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold)
    711       return true;
    712   }
    713 
    714   // 2nd priority is unroll count set by pragma.
    715   unsigned PragmaCount = UnrollCountPragmaValue(L);
    716   if (PragmaCount > 0) {
    717     UP.Count = PragmaCount;
    718     UP.Runtime = true;
    719     UP.AllowExpensiveTripCount = true;
    720     UP.Force = true;
    721     if (UP.AllowRemainder &&
    722         (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
    723       return true;
    724   }
    725   bool PragmaFullUnroll = HasUnrollFullPragma(L);
    726   if (PragmaFullUnroll && TripCount != 0) {
    727     UP.Count = TripCount;
    728     if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
    729       return false;
    730   }
    731 
    732   bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
    733   bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
    734                         PragmaEnableUnroll || UserUnrollCount;
    735 
    736   uint64_t UnrolledSize;
    737   DebugLoc LoopLoc = L->getStartLoc();
    738   Function *F = L->getHeader()->getParent();
    739   LLVMContext &Ctx = F->getContext();
    740 
    741   if (ExplicitUnroll && TripCount != 0) {
    742     // If the loop has an unrolling pragma, we want to be more aggressive with
    743     // unrolling limits. Set thresholds to at least the PragmaThreshold value
    744     // which is larger than the default limits.
    745     UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
    746     UP.PartialThreshold =
    747         std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
    748   }
    749 
    750   // 3rd priority is full unroll count.
    751   // Full unroll make sense only when TripCount could be staticaly calculated.
    752   // Also we need to check if we exceed FullUnrollMaxCount.
    753   if (TripCount && TripCount <= UP.FullUnrollMaxCount) {
    754     // When computing the unrolled size, note that BEInsns are not replicated
    755     // like the rest of the loop body.
    756     UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns;
    757     if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
    758                             UnrolledSize, UnrolledSize)) {
    759       UP.Count = TripCount;
    760       return ExplicitUnroll;
    761     } else {
    762       // The loop isn't that small, but we still can fully unroll it if that
    763       // helps to remove a significant number of instructions.
    764       // To check that, run additional analysis on the loop.
    765       if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
    766               L, TripCount, DT, *SE, TTI,
    767               UP.Threshold + UP.DynamicCostSavingsDiscount))
    768         if (canUnrollCompletely(L, UP.Threshold,
    769                                 UP.PercentDynamicCostSavedThreshold,
    770                                 UP.DynamicCostSavingsDiscount,
    771                                 Cost->UnrolledCost, Cost->RolledDynamicCost)) {
    772           UP.Count = TripCount;
    773           return ExplicitUnroll;
    774         }
    775     }
    776   }
    777 
    778   // 4rd priority is partial unrolling.
    779   // Try partial unroll only when TripCount could be staticaly calculated.
    780   if (TripCount) {
    781     if (UP.Count == 0)
    782       UP.Count = TripCount;
    783     UP.Partial |= ExplicitUnroll;
    784     if (!UP.Partial) {
    785       DEBUG(dbgs() << "  will not try to unroll partially because "
    786                    << "-unroll-allow-partial not given\n");
    787       UP.Count = 0;
    788       return false;
    789     }
    790     if (UP.PartialThreshold != NoThreshold) {
    791       // Reduce unroll count to be modulo of TripCount for partial unrolling.
    792       UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns;
    793       if (UnrolledSize > UP.PartialThreshold)
    794         UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) /
    795                    (LoopSize - BEInsns);
    796       if (UP.Count > UP.MaxCount)
    797         UP.Count = UP.MaxCount;
    798       while (UP.Count != 0 && TripCount % UP.Count != 0)
    799         UP.Count--;
    800       if (UP.AllowRemainder && UP.Count <= 1) {
    801         // If there is no Count that is modulo of TripCount, set Count to
    802         // largest power-of-two factor that satisfies the threshold limit.
    803         // As we'll create fixup loop, do the type of unrolling only if
    804         // remainder loop is allowed.
    805         UP.Count = DefaultUnrollRuntimeCount;
    806         UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
    807         while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
    808           UP.Count >>= 1;
    809           UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
    810         }
    811       }
    812       if (UP.Count < 2) {
    813         if (PragmaEnableUnroll)
    814           emitOptimizationRemarkMissed(
    815               Ctx, DEBUG_TYPE, *F, LoopLoc,
    816               "Unable to unroll loop as directed by unroll(enable) pragma "
    817               "because unrolled size is too large.");
    818         UP.Count = 0;
    819       }
    820     } else {
    821       UP.Count = TripCount;
    822     }
    823     if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
    824         UP.Count != TripCount)
    825       emitOptimizationRemarkMissed(
    826           Ctx, DEBUG_TYPE, *F, LoopLoc,
    827           "Unable to fully unroll loop as directed by unroll pragma because "
    828           "unrolled size is too large.");
    829     return ExplicitUnroll;
    830   }
    831   assert(TripCount == 0 &&
    832          "All cases when TripCount is constant should be covered here.");
    833   if (PragmaFullUnroll)
    834     emitOptimizationRemarkMissed(
    835         Ctx, DEBUG_TYPE, *F, LoopLoc,
    836         "Unable to fully unroll loop as directed by unroll(full) pragma "
    837         "because loop has a runtime trip count.");
    838 
    839   // 5th priority is runtime unrolling.
    840   // Don't unroll a runtime trip count loop when it is disabled.
    841   if (HasRuntimeUnrollDisablePragma(L)) {
    842     UP.Count = 0;
    843     return false;
    844   }
    845   // Reduce count based on the type of unrolling and the threshold values.
    846   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
    847   if (!UP.Runtime) {
    848     DEBUG(dbgs() << "  will not try to unroll loop with runtime trip count "
    849                  << "-unroll-runtime not given\n");
    850     UP.Count = 0;
    851     return false;
    852   }
    853   if (UP.Count == 0)
    854     UP.Count = DefaultUnrollRuntimeCount;
    855   UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
    856 
    857   // Reduce unroll count to be the largest power-of-two factor of
    858   // the original count which satisfies the threshold limit.
    859   while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
    860     UP.Count >>= 1;
    861     UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
    862   }
    863 
    864 #ifndef NDEBUG
    865   unsigned OrigCount = UP.Count;
    866 #endif
    867 
    868   if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
    869     while (UP.Count != 0 && TripMultiple % UP.Count != 0)
    870       UP.Count >>= 1;
    871     DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
    872                     "specific or because the loop contains a convergent "
    873                     "instruction), so unroll count must divide the trip "
    874                     "multiple, "
    875                  << TripMultiple << ".  Reducing unroll count from "
    876                  << OrigCount << " to " << UP.Count << ".\n");
    877     if (PragmaCount > 0 && !UP.AllowRemainder)
    878       emitOptimizationRemarkMissed(
    879           Ctx, DEBUG_TYPE, *F, LoopLoc,
    880           Twine("Unable to unroll loop the number of times directed by "
    881                 "unroll_count pragma because remainder loop is restricted "
    882                 "(that could architecture specific or because the loop "
    883                 "contains a convergent instruction) and so must have an unroll "
    884                 "count that divides the loop trip multiple of ") +
    885               Twine(TripMultiple) + ".  Unrolling instead " + Twine(UP.Count) +
    886               " time(s).");
    887   }
    888 
    889   if (UP.Count > UP.MaxCount)
    890     UP.Count = UP.MaxCount;
    891   DEBUG(dbgs() << "  partially unrolling with count: " << UP.Count << "\n");
    892   if (UP.Count < 2)
    893     UP.Count = 0;
    894   return ExplicitUnroll;
    895 }
    896 
    897 static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
    898                             ScalarEvolution *SE, const TargetTransformInfo &TTI,
    899                             AssumptionCache &AC, bool PreserveLCSSA,
    900                             Optional<unsigned> ProvidedCount,
    901                             Optional<unsigned> ProvidedThreshold,
    902                             Optional<bool> ProvidedAllowPartial,
    903                             Optional<bool> ProvidedRuntime) {
    904   DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
    905                << "] Loop %" << L->getHeader()->getName() << "\n");
    906   if (HasUnrollDisablePragma(L)) {
    907     return false;
    908   }
    909 
    910   unsigned NumInlineCandidates;
    911   bool NotDuplicatable;
    912   bool Convergent;
    913   unsigned LoopSize = ApproximateLoopSize(
    914       L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
    915   DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
    916   if (NotDuplicatable) {
    917     DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
    918                  << " instructions.\n");
    919     return false;
    920   }
    921   if (NumInlineCandidates != 0) {
    922     DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
    923     return false;
    924   }
    925   if (!L->isLoopSimplifyForm()) {
    926     DEBUG(
    927         dbgs() << "  Not unrolling loop which is not in loop-simplify form.\n");
    928     return false;
    929   }
    930 
    931   // Find trip count and trip multiple if count is not available
    932   unsigned TripCount = 0;
    933   unsigned TripMultiple = 1;
    934   // If there are multiple exiting blocks but one of them is the latch, use the
    935   // latch for the trip count estimation. Otherwise insist on a single exiting
    936   // block for the trip count estimation.
    937   BasicBlock *ExitingBlock = L->getLoopLatch();
    938   if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
    939     ExitingBlock = L->getExitingBlock();
    940   if (ExitingBlock) {
    941     TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
    942     TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
    943   }
    944 
    945   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
    946       L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
    947       ProvidedRuntime);
    948 
    949   // If the loop contains a convergent operation, the prelude we'd add
    950   // to do the first few instructions before we hit the unrolled loop
    951   // is unsafe -- it adds a control-flow dependency to the convergent
    952   // operation.  Therefore restrict remainder loop (try unrollig without).
    953   //
    954   // TODO: This is quite conservative.  In practice, convergent_op()
    955   // is likely to be called unconditionally in the loop.  In this
    956   // case, the program would be ill-formed (on most architectures)
    957   // unless n were the same on all threads in a thread group.
    958   // Assuming n is the same on all threads, any kind of unrolling is
    959   // safe.  But currently llvm's notion of convergence isn't powerful
    960   // enough to express this.
    961   if (Convergent)
    962     UP.AllowRemainder = false;
    963 
    964   bool IsCountSetExplicitly = computeUnrollCount(L, TTI, DT, LI, SE, TripCount,
    965                                                  TripMultiple, LoopSize, UP);
    966   if (!UP.Count)
    967     return false;
    968   // Unroll factor (Count) must be less or equal to TripCount.
    969   if (TripCount && UP.Count > TripCount)
    970     UP.Count = TripCount;
    971 
    972   // Unroll the loop.
    973   if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
    974                   UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC,
    975                   PreserveLCSSA))
    976     return false;
    977 
    978   // If loop has an unroll count pragma or unrolled by explicitly set count
    979   // mark loop as unrolled to prevent unrolling beyond that requested.
    980   if (IsCountSetExplicitly)
    981     SetLoopAlreadyUnrolled(L);
    982   return true;
    983 }
    984 
    985 namespace {
    986 class LoopUnroll : public LoopPass {
    987 public:
    988   static char ID; // Pass ID, replacement for typeid
    989   LoopUnroll(Optional<unsigned> Threshold = None,
    990              Optional<unsigned> Count = None,
    991              Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
    992       : LoopPass(ID), ProvidedCount(std::move(Count)),
    993         ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
    994         ProvidedRuntime(Runtime) {
    995     initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
    996   }
    997 
    998   Optional<unsigned> ProvidedCount;
    999   Optional<unsigned> ProvidedThreshold;
   1000   Optional<bool> ProvidedAllowPartial;
   1001   Optional<bool> ProvidedRuntime;
   1002 
   1003   bool runOnLoop(Loop *L, LPPassManager &) override {
   1004     if (skipLoop(L))
   1005       return false;
   1006 
   1007     Function &F = *L->getHeader()->getParent();
   1008 
   1009     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   1010     LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   1011     ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   1012     const TargetTransformInfo &TTI =
   1013         getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   1014     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   1015     bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
   1016 
   1017     return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
   1018                            ProvidedThreshold, ProvidedAllowPartial,
   1019                            ProvidedRuntime);
   1020   }
   1021 
   1022   /// This transformation requires natural loop information & requires that
   1023   /// loop preheaders be inserted into the CFG...
   1024   ///
   1025   void getAnalysisUsage(AnalysisUsage &AU) const override {
   1026     AU.addRequired<AssumptionCacheTracker>();
   1027     AU.addRequired<TargetTransformInfoWrapperPass>();
   1028     // FIXME: Loop passes are required to preserve domtree, and for now we just
   1029     // recreate dom info if anything gets unrolled.
   1030     getLoopAnalysisUsage(AU);
   1031   }
   1032 };
   1033 }
   1034 
   1035 char LoopUnroll::ID = 0;
   1036 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
   1037 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
   1038 INITIALIZE_PASS_DEPENDENCY(LoopPass)
   1039 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
   1040 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
   1041 
   1042 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
   1043                                  int Runtime) {
   1044   // TODO: It would make more sense for this function to take the optionals
   1045   // directly, but that's dangerous since it would silently break out of tree
   1046   // callers.
   1047   return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
   1048                         Count == -1 ? None : Optional<unsigned>(Count),
   1049                         AllowPartial == -1 ? None
   1050                                            : Optional<bool>(AllowPartial),
   1051                         Runtime == -1 ? None : Optional<bool>(Runtime));
   1052 }
   1053 
   1054 Pass *llvm::createSimpleLoopUnrollPass() {
   1055   return llvm::createLoopUnrollPass(-1, -1, 0, 0);
   1056 }
   1057