Home | History | Annotate | Download | only in Scalar
      1 //===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass implements a simple loop unroller.  It works best when loops have
     11 // been canonicalized by the -indvars pass, allowing it to determine the trip
     12 // counts of loops easily.
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/Transforms/Scalar.h"
     16 #include "llvm/Analysis/CodeMetrics.h"
     17 #include "llvm/Analysis/LoopPass.h"
     18 #include "llvm/Analysis/ScalarEvolution.h"
     19 #include "llvm/Analysis/TargetTransformInfo.h"
     20 #include "llvm/IR/DataLayout.h"
     21 #include "llvm/IR/DiagnosticInfo.h"
     22 #include "llvm/IR/Dominators.h"
     23 #include "llvm/IR/IntrinsicInst.h"
     24 #include "llvm/IR/Metadata.h"
     25 #include "llvm/Support/CommandLine.h"
     26 #include "llvm/Support/Debug.h"
     27 #include "llvm/Support/raw_ostream.h"
     28 #include "llvm/Transforms/Utils/UnrollLoop.h"
     29 #include <climits>
     30 
     31 using namespace llvm;
     32 
     33 #define DEBUG_TYPE "loop-unroll"
     34 
     35 static cl::opt<unsigned>
     36 UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
     37   cl::desc("The cut-off point for automatic loop unrolling"));
     38 
     39 static cl::opt<unsigned>
     40 UnrollCount("unroll-count", cl::init(0), cl::Hidden,
     41   cl::desc("Use this unroll count for all loops including those with "
     42            "unroll_count pragma values, for testing purposes"));
     43 
     44 static cl::opt<bool>
     45 UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
     46   cl::desc("Allows loops to be partially unrolled until "
     47            "-unroll-threshold loop size is reached."));
     48 
     49 static cl::opt<bool>
     50 UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
     51   cl::desc("Unroll loops with run-time trip counts"));
     52 
     53 static cl::opt<unsigned>
     54 PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
     55   cl::desc("Unrolled size limit for loops with an unroll(enable) or "
     56            "unroll_count pragma."));
     57 
     58 namespace {
     59   class LoopUnroll : public LoopPass {
     60   public:
     61     static char ID; // Pass ID, replacement for typeid
     62     LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
     63       CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
     64       CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
     65       CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
     66       CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
     67 
     68       UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
     69       UserAllowPartial = (P != -1) ||
     70                          (UnrollAllowPartial.getNumOccurrences() > 0);
     71       UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
     72       UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
     73 
     74       initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
     75     }
     76 
     77     /// A magic value for use with the Threshold parameter to indicate
     78     /// that the loop unroll should be performed regardless of how much
     79     /// code expansion would result.
     80     static const unsigned NoThreshold = UINT_MAX;
     81 
     82     // Threshold to use when optsize is specified (and there is no
     83     // explicit -unroll-threshold).
     84     static const unsigned OptSizeUnrollThreshold = 50;
     85 
     86     // Default unroll count for loops with run-time trip count if
     87     // -unroll-count is not set
     88     static const unsigned UnrollRuntimeCount = 8;
     89 
     90     unsigned CurrentCount;
     91     unsigned CurrentThreshold;
     92     bool     CurrentAllowPartial;
     93     bool     CurrentRuntime;
     94     bool     UserCount;            // CurrentCount is user-specified.
     95     bool     UserThreshold;        // CurrentThreshold is user-specified.
     96     bool     UserAllowPartial;     // CurrentAllowPartial is user-specified.
     97     bool     UserRuntime;          // CurrentRuntime is user-specified.
     98 
     99     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
    100 
    101     /// This transformation requires natural loop information & requires that
    102     /// loop preheaders be inserted into the CFG...
    103     ///
    104     void getAnalysisUsage(AnalysisUsage &AU) const override {
    105       AU.addRequired<LoopInfo>();
    106       AU.addPreserved<LoopInfo>();
    107       AU.addRequiredID(LoopSimplifyID);
    108       AU.addPreservedID(LoopSimplifyID);
    109       AU.addRequiredID(LCSSAID);
    110       AU.addPreservedID(LCSSAID);
    111       AU.addRequired<ScalarEvolution>();
    112       AU.addPreserved<ScalarEvolution>();
    113       AU.addRequired<TargetTransformInfo>();
    114       // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
    115       // If loop unroll does not preserve dom info then LCSSA pass on next
    116       // loop will receive invalid dom info.
    117       // For now, recreate dom info, if loop is unrolled.
    118       AU.addPreserved<DominatorTreeWrapperPass>();
    119     }
    120 
    121     // Fill in the UnrollingPreferences parameter with values from the
    122     // TargetTransformationInfo.
    123     void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
    124                                  TargetTransformInfo::UnrollingPreferences &UP) {
    125       UP.Threshold = CurrentThreshold;
    126       UP.OptSizeThreshold = OptSizeUnrollThreshold;
    127       UP.PartialThreshold = CurrentThreshold;
    128       UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
    129       UP.Count = CurrentCount;
    130       UP.MaxCount = UINT_MAX;
    131       UP.Partial = CurrentAllowPartial;
    132       UP.Runtime = CurrentRuntime;
    133       TTI.getUnrollingPreferences(L, UP);
    134     }
    135 
    136     // Select and return an unroll count based on parameters from
    137     // user, unroll preferences, unroll pragmas, or a heuristic.
    138     // SetExplicitly is set to true if the unroll count is is set by
    139     // the user or a pragma rather than selected heuristically.
    140     unsigned
    141     selectUnrollCount(const Loop *L, unsigned TripCount, bool HasEnablePragma,
    142                       unsigned PragmaCount,
    143                       const TargetTransformInfo::UnrollingPreferences &UP,
    144                       bool &SetExplicitly);
    145 
    146 
    147     // Select threshold values used to limit unrolling based on a
    148     // total unrolled size.  Parameters Threshold and PartialThreshold
    149     // are set to the maximum unrolled size for fully and partially
    150     // unrolled loops respectively.
    151     void selectThresholds(const Loop *L, bool HasPragma,
    152                           const TargetTransformInfo::UnrollingPreferences &UP,
    153                           unsigned &Threshold, unsigned &PartialThreshold) {
    154       // Determine the current unrolling threshold.  While this is
    155       // normally set from UnrollThreshold, it is overridden to a
    156       // smaller value if the current function is marked as
    157       // optimize-for-size, and the unroll threshold was not user
    158       // specified.
    159       Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
    160       PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
    161       if (!UserThreshold &&
    162           L->getHeader()->getParent()->getAttributes().
    163               hasAttribute(AttributeSet::FunctionIndex,
    164                            Attribute::OptimizeForSize)) {
    165         Threshold = UP.OptSizeThreshold;
    166         PartialThreshold = UP.PartialOptSizeThreshold;
    167       }
    168       if (HasPragma) {
    169         // If the loop has an unrolling pragma, we want to be more
    170         // aggressive with unrolling limits.  Set thresholds to at
    171         // least the PragmaTheshold value which is larger than the
    172         // default limits.
    173         if (Threshold != NoThreshold)
    174           Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
    175         if (PartialThreshold != NoThreshold)
    176           PartialThreshold =
    177               std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
    178       }
    179     }
    180   };
    181 }
    182 
    183 char LoopUnroll::ID = 0;
    184 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
    185 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
    186 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
    187 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
    188 INITIALIZE_PASS_DEPENDENCY(LCSSA)
    189 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
    190 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
    191 
    192 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
    193                                  int Runtime) {
    194   return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
    195 }
    196 
    197 Pass *llvm::createSimpleLoopUnrollPass() {
    198   return llvm::createLoopUnrollPass(-1, -1, 0, 0);
    199 }
    200 
    201 /// ApproximateLoopSize - Approximate the size of the loop.
    202 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
    203                                     bool &NotDuplicatable,
    204                                     const TargetTransformInfo &TTI) {
    205   CodeMetrics Metrics;
    206   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
    207        I != E; ++I)
    208     Metrics.analyzeBasicBlock(*I, TTI);
    209   NumCalls = Metrics.NumInlineCandidates;
    210   NotDuplicatable = Metrics.notDuplicatable;
    211 
    212   unsigned LoopSize = Metrics.NumInsts;
    213 
    214   // Don't allow an estimate of size zero.  This would allows unrolling of loops
    215   // with huge iteration counts, which is a compile time problem even if it's
    216   // not a problem for code quality.
    217   if (LoopSize == 0) LoopSize = 1;
    218 
    219   return LoopSize;
    220 }
    221 
    222 // Returns the value associated with the given metadata node name (for
    223 // example, "llvm.loop.unroll.count").  If no such named metadata node
    224 // exists, then nullptr is returned.
    225 static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
    226                                                  StringRef Name) {
    227   MDNode *LoopID = L->getLoopID();
    228   if (!LoopID) return nullptr;
    229 
    230   // First operand should refer to the loop id itself.
    231   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
    232   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
    233 
    234   for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
    235     const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
    236     if (!MD) continue;
    237 
    238     const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
    239     if (!S) continue;
    240 
    241     if (Name.equals(S->getString())) {
    242       assert(MD->getNumOperands() == 2 &&
    243              "Unroll hint metadata should have two operands.");
    244       return cast<ConstantInt>(MD->getOperand(1));
    245     }
    246   }
    247   return nullptr;
    248 }
    249 
    250 // Returns true if the loop has an unroll(enable) pragma.
    251 static bool HasUnrollEnablePragma(const Loop *L) {
    252   const ConstantInt *EnableValue =
    253       GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
    254   return (EnableValue && EnableValue->getZExtValue());
    255 }
    256 
    257 // Returns true if the loop has an unroll(disable) pragma.
    258 static bool HasUnrollDisablePragma(const Loop *L) {
    259   const ConstantInt *EnableValue =
    260       GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
    261   return (EnableValue && !EnableValue->getZExtValue());
    262 }
    263 
    264 // If loop has an unroll_count pragma return the (necessarily
    265 // positive) value from the pragma.  Otherwise return 0.
    266 static unsigned UnrollCountPragmaValue(const Loop *L) {
    267   const ConstantInt *CountValue =
    268       GetUnrollMetadataValue(L, "llvm.loop.unroll.count");
    269   if (CountValue) {
    270     unsigned Count = CountValue->getZExtValue();
    271     assert(Count >= 1 && "Unroll count must be positive.");
    272     return Count;
    273   }
    274   return 0;
    275 }
    276 
    277 unsigned LoopUnroll::selectUnrollCount(
    278     const Loop *L, unsigned TripCount, bool HasEnablePragma,
    279     unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
    280     bool &SetExplicitly) {
    281   SetExplicitly = true;
    282 
    283   // User-specified count (either as a command-line option or
    284   // constructor parameter) has highest precedence.
    285   unsigned Count = UserCount ? CurrentCount : 0;
    286 
    287   // If there is no user-specified count, unroll pragmas have the next
    288   // highest precendence.
    289   if (Count == 0) {
    290     if (PragmaCount) {
    291       Count = PragmaCount;
    292     } else if (HasEnablePragma) {
    293       // unroll(enable) pragma without an unroll_count pragma
    294       // indicates to unroll loop fully.
    295       Count = TripCount;
    296     }
    297   }
    298 
    299   if (Count == 0)
    300     Count = UP.Count;
    301 
    302   if (Count == 0) {
    303     SetExplicitly = false;
    304     if (TripCount == 0)
    305       // Runtime trip count.
    306       Count = UnrollRuntimeCount;
    307     else
    308       // Conservative heuristic: if we know the trip count, see if we can
    309       // completely unroll (subject to the threshold, checked below); otherwise
    310       // try to find greatest modulo of the trip count which is still under
    311       // threshold value.
    312       Count = TripCount;
    313   }
    314   if (TripCount && Count > TripCount)
    315     return TripCount;
    316   return Count;
    317 }
    318 
    319 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
    320   if (skipOptnoneFunction(L))
    321     return false;
    322 
    323   LoopInfo *LI = &getAnalysis<LoopInfo>();
    324   ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
    325   const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
    326 
    327   BasicBlock *Header = L->getHeader();
    328   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
    329         << "] Loop %" << Header->getName() << "\n");
    330 
    331   if (HasUnrollDisablePragma(L)) {
    332     return false;
    333   }
    334   bool HasEnablePragma = HasUnrollEnablePragma(L);
    335   unsigned PragmaCount = UnrollCountPragmaValue(L);
    336   bool HasPragma = HasEnablePragma || PragmaCount > 0;
    337 
    338   TargetTransformInfo::UnrollingPreferences UP;
    339   getUnrollingPreferences(L, TTI, UP);
    340 
    341   // Find trip count and trip multiple if count is not available
    342   unsigned TripCount = 0;
    343   unsigned TripMultiple = 1;
    344   // Find "latch trip count". UnrollLoop assumes that control cannot exit
    345   // via the loop latch on any iteration prior to TripCount. The loop may exit
    346   // early via an earlier branch.
    347   BasicBlock *LatchBlock = L->getLoopLatch();
    348   if (LatchBlock) {
    349     TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
    350     TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
    351   }
    352 
    353   // Select an initial unroll count.  This may be reduced later based
    354   // on size thresholds.
    355   bool CountSetExplicitly;
    356   unsigned Count = selectUnrollCount(L, TripCount, HasEnablePragma, PragmaCount,
    357                                      UP, CountSetExplicitly);
    358 
    359   unsigned NumInlineCandidates;
    360   bool notDuplicatable;
    361   unsigned LoopSize =
    362       ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);
    363   DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
    364   uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
    365   if (notDuplicatable) {
    366     DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
    367                  << " instructions.\n");
    368     return false;
    369   }
    370   if (NumInlineCandidates != 0) {
    371     DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
    372     return false;
    373   }
    374 
    375   unsigned Threshold, PartialThreshold;
    376   selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold);
    377 
    378   // Given Count, TripCount and thresholds determine the type of
    379   // unrolling which is to be performed.
    380   enum { Full = 0, Partial = 1, Runtime = 2 };
    381   int Unrolling;
    382   if (TripCount && Count == TripCount) {
    383     if (Threshold != NoThreshold && UnrolledSize > Threshold) {
    384       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
    385                    << " because size: " << UnrolledSize << ">" << Threshold
    386                    << "\n");
    387       Unrolling = Partial;
    388     } else {
    389       Unrolling = Full;
    390     }
    391   } else if (TripCount && Count < TripCount) {
    392     Unrolling = Partial;
    393   } else {
    394     Unrolling = Runtime;
    395   }
    396 
    397   // Reduce count based on the type of unrolling and the threshold values.
    398   unsigned OriginalCount = Count;
    399   bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
    400   if (Unrolling == Partial) {
    401     bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
    402     if (!AllowPartial && !CountSetExplicitly) {
    403       DEBUG(dbgs() << "  will not try to unroll partially because "
    404                    << "-unroll-allow-partial not given\n");
    405       return false;
    406     }
    407     if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
    408       // Reduce unroll count to be modulo of TripCount for partial unrolling.
    409       Count = PartialThreshold / LoopSize;
    410       while (Count != 0 && TripCount % Count != 0)
    411         Count--;
    412     }
    413   } else if (Unrolling == Runtime) {
    414     if (!AllowRuntime && !CountSetExplicitly) {
    415       DEBUG(dbgs() << "  will not try to unroll loop with runtime trip count "
    416                    << "-unroll-runtime not given\n");
    417       return false;
    418     }
    419     // Reduce unroll count to be the largest power-of-two factor of
    420     // the original count which satisfies the threshold limit.
    421     while (Count != 0 && UnrolledSize > PartialThreshold) {
    422       Count >>= 1;
    423       UnrolledSize = LoopSize * Count;
    424     }
    425     if (Count > UP.MaxCount)
    426       Count = UP.MaxCount;
    427     DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
    428   }
    429 
    430   if (HasPragma) {
    431     // Emit optimization remarks if we are unable to unroll the loop
    432     // as directed by a pragma.
    433     DebugLoc LoopLoc = L->getStartLoc();
    434     Function *F = Header->getParent();
    435     LLVMContext &Ctx = F->getContext();
    436     if (HasEnablePragma && PragmaCount == 0) {
    437       if (TripCount && Count != TripCount) {
    438         emitOptimizationRemarkMissed(
    439             Ctx, DEBUG_TYPE, *F, LoopLoc,
    440             "Unable to fully unroll loop as directed by unroll(enable) pragma "
    441             "because unrolled size is too large.");
    442       } else if (!TripCount) {
    443         emitOptimizationRemarkMissed(
    444             Ctx, DEBUG_TYPE, *F, LoopLoc,
    445             "Unable to fully unroll loop as directed by unroll(enable) pragma "
    446             "because loop has a runtime trip count.");
    447       }
    448     } else if (PragmaCount > 0 && Count != OriginalCount) {
    449       emitOptimizationRemarkMissed(
    450           Ctx, DEBUG_TYPE, *F, LoopLoc,
    451           "Unable to unroll loop the number of times directed by "
    452           "unroll_count pragma because unrolled size is too large.");
    453     }
    454   }
    455 
    456   if (Unrolling != Full && Count < 2) {
    457     // Partial unrolling by 1 is a nop.  For full unrolling, a factor
    458     // of 1 makes sense because loop control can be eliminated.
    459     return false;
    460   }
    461 
    462   // Unroll the loop.
    463   if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, &LPM))
    464     return false;
    465 
    466   return true;
    467 }
    468