Home | History | Annotate | Download | only in Analysis
      1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements inline cost analysis.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/Analysis/InlineCost.h"
     15 #include "llvm/ADT/STLExtras.h"
     16 #include "llvm/ADT/SetVector.h"
     17 #include "llvm/ADT/SmallPtrSet.h"
     18 #include "llvm/ADT/SmallVector.h"
     19 #include "llvm/ADT/Statistic.h"
     20 #include "llvm/Analysis/AssumptionCache.h"
     21 #include "llvm/Analysis/BlockFrequencyInfo.h"
     22 #include "llvm/Analysis/CodeMetrics.h"
     23 #include "llvm/Analysis/ConstantFolding.h"
     24 #include "llvm/Analysis/CFG.h"
     25 #include "llvm/Analysis/InstructionSimplify.h"
     26 #include "llvm/Analysis/ProfileSummaryInfo.h"
     27 #include "llvm/Analysis/TargetTransformInfo.h"
     28 #include "llvm/Analysis/ValueTracking.h"
     29 #include "llvm/Config/llvm-config.h"
     30 #include "llvm/IR/CallSite.h"
     31 #include "llvm/IR/CallingConv.h"
     32 #include "llvm/IR/DataLayout.h"
     33 #include "llvm/IR/GetElementPtrTypeIterator.h"
     34 #include "llvm/IR/GlobalAlias.h"
     35 #include "llvm/IR/InstVisitor.h"
     36 #include "llvm/IR/IntrinsicInst.h"
     37 #include "llvm/IR/Operator.h"
     38 #include "llvm/Support/Debug.h"
     39 #include "llvm/Support/raw_ostream.h"
     40 
     41 using namespace llvm;
     42 
     43 #define DEBUG_TYPE "inline-cost"
     44 
     45 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
     46 
     47 static cl::opt<int> InlineThreshold(
     48     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
     49     cl::desc("Control the amount of inlining to perform (default = 225)"));
     50 
     51 static cl::opt<int> HintThreshold(
     52     "inlinehint-threshold", cl::Hidden, cl::init(325),
     53     cl::desc("Threshold for inlining functions with inline hint"));
     54 
     55 static cl::opt<int>
     56     ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
     57                           cl::init(45),
     58                           cl::desc("Threshold for inlining cold callsites"));
     59 
     60 // We introduce this threshold to help performance of instrumentation based
     61 // PGO before we actually hook up inliner with analysis passes such as BPI and
     62 // BFI.
     63 static cl::opt<int> ColdThreshold(
     64     "inlinecold-threshold", cl::Hidden, cl::init(45),
     65     cl::desc("Threshold for inlining functions with cold attribute"));
     66 
     67 static cl::opt<int>
     68     HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
     69                          cl::ZeroOrMore,
     70                          cl::desc("Threshold for hot callsites "));
     71 
     72 static cl::opt<int> LocallyHotCallSiteThreshold(
     73     "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
     74     cl::desc("Threshold for locally hot callsites "));
     75 
     76 static cl::opt<int> ColdCallSiteRelFreq(
     77     "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
     78     cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
     79              "entry frequency, for a callsite to be cold in the absence of "
     80              "profile information."));
     81 
     82 static cl::opt<int> HotCallSiteRelFreq(
     83     "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
     84     cl::desc("Minimum block frequency, expressed as a multiple of caller's "
     85              "entry frequency, for a callsite to be hot in the absence of "
     86              "profile information."));
     87 
     88 static cl::opt<bool> OptComputeFullInlineCost(
     89     "inline-cost-full", cl::Hidden, cl::init(false),
     90     cl::desc("Compute the full inline cost of a call site even when the cost "
     91              "exceeds the threshold."));
     92 
     93 namespace {
     94 
     95 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
     96   typedef InstVisitor<CallAnalyzer, bool> Base;
     97   friend class InstVisitor<CallAnalyzer, bool>;
     98 
     99   /// The TargetTransformInfo available for this compilation.
    100   const TargetTransformInfo &TTI;
    101 
    102   /// Getter for the cache of @llvm.assume intrinsics.
    103   std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
    104 
    105   /// Getter for BlockFrequencyInfo
    106   Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI;
    107 
    108   /// Profile summary information.
    109   ProfileSummaryInfo *PSI;
    110 
    111   /// The called function.
    112   Function &F;
    113 
    114   // Cache the DataLayout since we use it a lot.
    115   const DataLayout &DL;
    116 
    117   /// The OptimizationRemarkEmitter available for this compilation.
    118   OptimizationRemarkEmitter *ORE;
    119 
    120   /// The candidate callsite being analyzed. Please do not use this to do
    121   /// analysis in the caller function; we want the inline cost query to be
    122   /// easily cacheable. Instead, use the cover function paramHasAttr.
    123   CallSite CandidateCS;
    124 
    125   /// Tunable parameters that control the analysis.
    126   const InlineParams &Params;
    127 
    128   int Threshold;
    129   int Cost;
    130   bool ComputeFullInlineCost;
    131 
    132   bool IsCallerRecursive;
    133   bool IsRecursiveCall;
    134   bool ExposesReturnsTwice;
    135   bool HasDynamicAlloca;
    136   bool ContainsNoDuplicateCall;
    137   bool HasReturn;
    138   bool HasIndirectBr;
    139   bool HasUninlineableIntrinsic;
    140   bool UsesVarArgs;
    141 
    142   /// Number of bytes allocated statically by the callee.
    143   uint64_t AllocatedSize;
    144   unsigned NumInstructions, NumVectorInstructions;
    145   int VectorBonus, TenPercentVectorBonus;
    146   // Bonus to be applied when the callee has only one reachable basic block.
    147   int SingleBBBonus;
    148 
    149   /// While we walk the potentially-inlined instructions, we build up and
    150   /// maintain a mapping of simplified values specific to this callsite. The
    151   /// idea is to propagate any special information we have about arguments to
    152   /// this call through the inlinable section of the function, and account for
    153   /// likely simplifications post-inlining. The most important aspect we track
    154   /// is CFG altering simplifications -- when we prove a basic block dead, that
    155   /// can cause dramatic shifts in the cost of inlining a function.
    156   DenseMap<Value *, Constant *> SimplifiedValues;
    157 
    158   /// Keep track of the values which map back (through function arguments) to
    159   /// allocas on the caller stack which could be simplified through SROA.
    160   DenseMap<Value *, Value *> SROAArgValues;
    161 
    162   /// The mapping of caller Alloca values to their accumulated cost savings. If
    163   /// we have to disable SROA for one of the allocas, this tells us how much
    164   /// cost must be added.
    165   DenseMap<Value *, int> SROAArgCosts;
    166 
    167   /// Keep track of values which map to a pointer base and constant offset.
    168   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
    169 
    170   /// Keep track of dead blocks due to the constant arguments.
    171   SetVector<BasicBlock *> DeadBlocks;
    172 
    173   /// The mapping of the blocks to their known unique successors due to the
    174   /// constant arguments.
    175   DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
    176 
    177   /// Model the elimination of repeated loads that is expected to happen
    178   /// whenever we simplify away the stores that would otherwise cause them to be
    179   /// loads.
    180   bool EnableLoadElimination;
    181   SmallPtrSet<Value *, 16> LoadAddrSet;
    182   int LoadEliminationCost;
    183 
    184   // Custom simplification helper routines.
    185   bool isAllocaDerivedArg(Value *V);
    186   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
    187                             DenseMap<Value *, int>::iterator &CostIt);
    188   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
    189   void disableSROA(Value *V);
    190   void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
    191   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
    192                           int InstructionCost);
    193   void disableLoadElimination();
    194   bool isGEPFree(GetElementPtrInst &GEP);
    195   bool canFoldInboundsGEP(GetElementPtrInst &I);
    196   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
    197   bool simplifyCallSite(Function *F, CallSite CS);
    198   template <typename Callable>
    199   bool simplifyInstruction(Instruction &I, Callable Evaluate);
    200   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
    201 
    202   /// Return true if the given argument to the function being considered for
    203   /// inlining has the given attribute set either at the call site or the
    204   /// function declaration.  Primarily used to inspect call site specific
    205   /// attributes since these can be more precise than the ones on the callee
    206   /// itself.
    207   bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
    208 
    209   /// Return true if the given value is known non null within the callee if
    210   /// inlined through this particular callsite.
    211   bool isKnownNonNullInCallee(Value *V);
    212 
    213   /// Update Threshold based on callsite properties such as callee
    214   /// attributes and callee hotness for PGO builds. The Callee is explicitly
    215   /// passed to support analyzing indirect calls whose target is inferred by
    216   /// analysis.
    217   void updateThreshold(CallSite CS, Function &Callee);
    218 
    219   /// Return true if size growth is allowed when inlining the callee at CS.
    220   bool allowSizeGrowth(CallSite CS);
    221 
    222   /// Return true if \p CS is a cold callsite.
    223   bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
    224 
    225   /// Return a higher threshold if \p CS is a hot callsite.
    226   Optional<int> getHotCallSiteThreshold(CallSite CS,
    227                                         BlockFrequencyInfo *CallerBFI);
    228 
    229   // Custom analysis routines.
    230   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
    231 
    232   // Disable several entry points to the visitor so we don't accidentally use
    233   // them by declaring but not defining them here.
    234   void visit(Module *);
    235   void visit(Module &);
    236   void visit(Function *);
    237   void visit(Function &);
    238   void visit(BasicBlock *);
    239   void visit(BasicBlock &);
    240 
    241   // Provide base case for our instruction visit.
    242   bool visitInstruction(Instruction &I);
    243 
    244   // Our visit overrides.
    245   bool visitAlloca(AllocaInst &I);
    246   bool visitPHI(PHINode &I);
    247   bool visitGetElementPtr(GetElementPtrInst &I);
    248   bool visitBitCast(BitCastInst &I);
    249   bool visitPtrToInt(PtrToIntInst &I);
    250   bool visitIntToPtr(IntToPtrInst &I);
    251   bool visitCastInst(CastInst &I);
    252   bool visitUnaryInstruction(UnaryInstruction &I);
    253   bool visitCmpInst(CmpInst &I);
    254   bool visitSub(BinaryOperator &I);
    255   bool visitBinaryOperator(BinaryOperator &I);
    256   bool visitLoad(LoadInst &I);
    257   bool visitStore(StoreInst &I);
    258   bool visitExtractValue(ExtractValueInst &I);
    259   bool visitInsertValue(InsertValueInst &I);
    260   bool visitCallSite(CallSite CS);
    261   bool visitReturnInst(ReturnInst &RI);
    262   bool visitBranchInst(BranchInst &BI);
    263   bool visitSelectInst(SelectInst &SI);
    264   bool visitSwitchInst(SwitchInst &SI);
    265   bool visitIndirectBrInst(IndirectBrInst &IBI);
    266   bool visitResumeInst(ResumeInst &RI);
    267   bool visitCleanupReturnInst(CleanupReturnInst &RI);
    268   bool visitCatchReturnInst(CatchReturnInst &RI);
    269   bool visitUnreachableInst(UnreachableInst &I);
    270 
    271 public:
    272   CallAnalyzer(const TargetTransformInfo &TTI,
    273                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
    274                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
    275                ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
    276                Function &Callee, CallSite CSArg, const InlineParams &Params)
    277       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
    278         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
    279         CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
    280         Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
    281                                        Params.ComputeFullInlineCost || ORE),
    282         IsCallerRecursive(false), IsRecursiveCall(false),
    283         ExposesReturnsTwice(false), HasDynamicAlloca(false),
    284         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
    285         HasUninlineableIntrinsic(false), UsesVarArgs(false), AllocatedSize(0),
    286         NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
    287         SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
    288         NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
    289         NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
    290         NumInstructionsSimplified(0), SROACostSavings(0),
    291         SROACostSavingsLost(0) {}
    292 
    293   bool analyzeCall(CallSite CS);
    294 
    295   int getThreshold() { return Threshold; }
    296   int getCost() { return Cost; }
    297 
    298   // Keep a bunch of stats about the cost savings found so we can print them
    299   // out when debugging.
    300   unsigned NumConstantArgs;
    301   unsigned NumConstantOffsetPtrArgs;
    302   unsigned NumAllocaArgs;
    303   unsigned NumConstantPtrCmps;
    304   unsigned NumConstantPtrDiffs;
    305   unsigned NumInstructionsSimplified;
    306   unsigned SROACostSavings;
    307   unsigned SROACostSavingsLost;
    308 
    309   void dump();
    310 };
    311 
    312 } // namespace
    313 
    314 /// Test whether the given value is an Alloca-derived function argument.
    315 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
    316   return SROAArgValues.count(V);
    317 }
    318 
    319 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
    320 /// Returns false if V does not map to a SROA-candidate.
    321 bool CallAnalyzer::lookupSROAArgAndCost(
    322     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
    323   if (SROAArgValues.empty() || SROAArgCosts.empty())
    324     return false;
    325 
    326   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
    327   if (ArgIt == SROAArgValues.end())
    328     return false;
    329 
    330   Arg = ArgIt->second;
    331   CostIt = SROAArgCosts.find(Arg);
    332   return CostIt != SROAArgCosts.end();
    333 }
    334 
    335 /// Disable SROA for the candidate marked by this cost iterator.
    336 ///
    337 /// This marks the candidate as no longer viable for SROA, and adds the cost
    338 /// savings associated with it back into the inline cost measurement.
    339 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
    340   // If we're no longer able to perform SROA we need to undo its cost savings
    341   // and prevent subsequent analysis.
    342   Cost += CostIt->second;
    343   SROACostSavings -= CostIt->second;
    344   SROACostSavingsLost += CostIt->second;
    345   SROAArgCosts.erase(CostIt);
    346   disableLoadElimination();
    347 }
    348 
    349 /// If 'V' maps to a SROA candidate, disable SROA for it.
    350 void CallAnalyzer::disableSROA(Value *V) {
    351   Value *SROAArg;
    352   DenseMap<Value *, int>::iterator CostIt;
    353   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
    354     disableSROA(CostIt);
    355 }
    356 
    357 /// Accumulate the given cost for a particular SROA candidate.
    358 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
    359                                       int InstructionCost) {
    360   CostIt->second += InstructionCost;
    361   SROACostSavings += InstructionCost;
    362 }
    363 
    364 void CallAnalyzer::disableLoadElimination() {
    365   if (EnableLoadElimination) {
    366     Cost += LoadEliminationCost;
    367     LoadEliminationCost = 0;
    368     EnableLoadElimination = false;
    369   }
    370 }
    371 
    372 /// Accumulate a constant GEP offset into an APInt if possible.
    373 ///
    374 /// Returns false if unable to compute the offset for any reason. Respects any
    375 /// simplified values known during the analysis of this callsite.
    376 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
    377   unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
    378   assert(IntPtrWidth == Offset.getBitWidth());
    379 
    380   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
    381        GTI != GTE; ++GTI) {
    382     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
    383     if (!OpC)
    384       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
    385         OpC = dyn_cast<ConstantInt>(SimpleOp);
    386     if (!OpC)
    387       return false;
    388     if (OpC->isZero())
    389       continue;
    390 
    391     // Handle a struct index, which adds its field offset to the pointer.
    392     if (StructType *STy = GTI.getStructTypeOrNull()) {
    393       unsigned ElementIdx = OpC->getZExtValue();
    394       const StructLayout *SL = DL.getStructLayout(STy);
    395       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
    396       continue;
    397     }
    398 
    399     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
    400     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
    401   }
    402   return true;
    403 }
    404 
    405 /// Use TTI to check whether a GEP is free.
    406 ///
    407 /// Respects any simplified values known during the analysis of this callsite.
    408 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
    409   SmallVector<Value *, 4> Operands;
    410   Operands.push_back(GEP.getOperand(0));
    411   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
    412     if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
    413        Operands.push_back(SimpleOp);
    414      else
    415        Operands.push_back(*I);
    416   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
    417 }
    418 
    419 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
    420   // Check whether inlining will turn a dynamic alloca into a static
    421   // alloca and handle that case.
    422   if (I.isArrayAllocation()) {
    423     Constant *Size = SimplifiedValues.lookup(I.getArraySize());
    424     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
    425       Type *Ty = I.getAllocatedType();
    426       AllocatedSize = SaturatingMultiplyAdd(
    427           AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
    428       return Base::visitAlloca(I);
    429     }
    430   }
    431 
    432   // Accumulate the allocated size.
    433   if (I.isStaticAlloca()) {
    434     Type *Ty = I.getAllocatedType();
    435     AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
    436   }
    437 
    438   // We will happily inline static alloca instructions.
    439   if (I.isStaticAlloca())
    440     return Base::visitAlloca(I);
    441 
    442   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
    443   // a variety of reasons, and so we would like to not inline them into
    444   // functions which don't currently have a dynamic alloca. This simply
    445   // disables inlining altogether in the presence of a dynamic alloca.
    446   HasDynamicAlloca = true;
    447   return false;
    448 }
    449 
    450 bool CallAnalyzer::visitPHI(PHINode &I) {
    451   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
    452   // though we don't want to propagate it's bonuses. The idea is to disable
    453   // SROA if it *might* be used in an inappropriate manner.
    454 
    455   // Phi nodes are always zero-cost.
    456   // FIXME: Pointer sizes may differ between different address spaces, so do we
    457   // need to use correct address space in the call to getPointerSizeInBits here?
    458   // Or could we skip the getPointerSizeInBits call completely? As far as I can
    459   // see the ZeroOffset is used as a dummy value, so we can probably use any
    460   // bit width for the ZeroOffset?
    461   APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
    462   bool CheckSROA = I.getType()->isPointerTy();
    463 
    464   // Track the constant or pointer with constant offset we've seen so far.
    465   Constant *FirstC = nullptr;
    466   std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
    467   Value *FirstV = nullptr;
    468 
    469   for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
    470     BasicBlock *Pred = I.getIncomingBlock(i);
    471     // If the incoming block is dead, skip the incoming block.
    472     if (DeadBlocks.count(Pred))
    473       continue;
    474     // If the parent block of phi is not the known successor of the incoming
    475     // block, skip the incoming block.
    476     BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
    477     if (KnownSuccessor && KnownSuccessor != I.getParent())
    478       continue;
    479 
    480     Value *V = I.getIncomingValue(i);
    481     // If the incoming value is this phi itself, skip the incoming value.
    482     if (&I == V)
    483       continue;
    484 
    485     Constant *C = dyn_cast<Constant>(V);
    486     if (!C)
    487       C = SimplifiedValues.lookup(V);
    488 
    489     std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
    490     if (!C && CheckSROA)
    491       BaseAndOffset = ConstantOffsetPtrs.lookup(V);
    492 
    493     if (!C && !BaseAndOffset.first)
    494       // The incoming value is neither a constant nor a pointer with constant
    495       // offset, exit early.
    496       return true;
    497 
    498     if (FirstC) {
    499       if (FirstC == C)
    500         // If we've seen a constant incoming value before and it is the same
    501         // constant we see this time, continue checking the next incoming value.
    502         continue;
    503       // Otherwise early exit because we either see a different constant or saw
    504       // a constant before but we have a pointer with constant offset this time.
    505       return true;
    506     }
    507 
    508     if (FirstV) {
    509       // The same logic as above, but check pointer with constant offset here.
    510       if (FirstBaseAndOffset == BaseAndOffset)
    511         continue;
    512       return true;
    513     }
    514 
    515     if (C) {
    516       // This is the 1st time we've seen a constant, record it.
    517       FirstC = C;
    518       continue;
    519     }
    520 
    521     // The remaining case is that this is the 1st time we've seen a pointer with
    522     // constant offset, record it.
    523     FirstV = V;
    524     FirstBaseAndOffset = BaseAndOffset;
    525   }
    526 
    527   // Check if we can map phi to a constant.
    528   if (FirstC) {
    529     SimplifiedValues[&I] = FirstC;
    530     return true;
    531   }
    532 
    533   // Check if we can map phi to a pointer with constant offset.
    534   if (FirstBaseAndOffset.first) {
    535     ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
    536 
    537     Value *SROAArg;
    538     DenseMap<Value *, int>::iterator CostIt;
    539     if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
    540       SROAArgValues[&I] = SROAArg;
    541   }
    542 
    543   return true;
    544 }
    545 
    546 /// Check we can fold GEPs of constant-offset call site argument pointers.
    547 /// This requires target data and inbounds GEPs.
    548 ///
    549 /// \return true if the specified GEP can be folded.
    550 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
    551   // Check if we have a base + offset for the pointer.
    552   std::pair<Value *, APInt> BaseAndOffset =
    553       ConstantOffsetPtrs.lookup(I.getPointerOperand());
    554   if (!BaseAndOffset.first)
    555     return false;
    556 
    557   // Check if the offset of this GEP is constant, and if so accumulate it
    558   // into Offset.
    559   if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
    560     return false;
    561 
    562   // Add the result as a new mapping to Base + Offset.
    563   ConstantOffsetPtrs[&I] = BaseAndOffset;
    564 
    565   return true;
    566 }
    567 
    568 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
    569   Value *SROAArg;
    570   DenseMap<Value *, int>::iterator CostIt;
    571   bool SROACandidate =
    572       lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
    573 
    574   // Lambda to check whether a GEP's indices are all constant.
    575   auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
    576     for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
    577       if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
    578         return false;
    579     return true;
    580   };
    581 
    582   if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
    583     if (SROACandidate)
    584       SROAArgValues[&I] = SROAArg;
    585 
    586     // Constant GEPs are modeled as free.
    587     return true;
    588   }
    589 
    590   // Variable GEPs will require math and will disable SROA.
    591   if (SROACandidate)
    592     disableSROA(CostIt);
    593   return isGEPFree(I);
    594 }
    595 
    596 /// Simplify \p I if its operands are constants and update SimplifiedValues.
    597 /// \p Evaluate is a callable specific to instruction type that evaluates the
    598 /// instruction when all the operands are constants.
    599 template <typename Callable>
    600 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
    601   SmallVector<Constant *, 2> COps;
    602   for (Value *Op : I.operands()) {
    603     Constant *COp = dyn_cast<Constant>(Op);
    604     if (!COp)
    605       COp = SimplifiedValues.lookup(Op);
    606     if (!COp)
    607       return false;
    608     COps.push_back(COp);
    609   }
    610   auto *C = Evaluate(COps);
    611   if (!C)
    612     return false;
    613   SimplifiedValues[&I] = C;
    614   return true;
    615 }
    616 
    617 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
    618   // Propagate constants through bitcasts.
    619   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
    620         return ConstantExpr::getBitCast(COps[0], I.getType());
    621       }))
    622     return true;
    623 
    624   // Track base/offsets through casts
    625   std::pair<Value *, APInt> BaseAndOffset =
    626       ConstantOffsetPtrs.lookup(I.getOperand(0));
    627   // Casts don't change the offset, just wrap it up.
    628   if (BaseAndOffset.first)
    629     ConstantOffsetPtrs[&I] = BaseAndOffset;
    630 
    631   // Also look for SROA candidates here.
    632   Value *SROAArg;
    633   DenseMap<Value *, int>::iterator CostIt;
    634   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
    635     SROAArgValues[&I] = SROAArg;
    636 
    637   // Bitcasts are always zero cost.
    638   return true;
    639 }
    640 
    641 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
    642   // Propagate constants through ptrtoint.
    643   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
    644         return ConstantExpr::getPtrToInt(COps[0], I.getType());
    645       }))
    646     return true;
    647 
    648   // Track base/offset pairs when converted to a plain integer provided the
    649   // integer is large enough to represent the pointer.
    650   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
    651   unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
    652   if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
    653     std::pair<Value *, APInt> BaseAndOffset =
    654         ConstantOffsetPtrs.lookup(I.getOperand(0));
    655     if (BaseAndOffset.first)
    656       ConstantOffsetPtrs[&I] = BaseAndOffset;
    657   }
    658 
    659   // This is really weird. Technically, ptrtoint will disable SROA. However,
    660   // unless that ptrtoint is *used* somewhere in the live basic blocks after
    661   // inlining, it will be nuked, and SROA should proceed. All of the uses which
    662   // would block SROA would also block SROA if applied directly to a pointer,
    663   // and so we can just add the integer in here. The only places where SROA is
    664   // preserved either cannot fire on an integer, or won't in-and-of themselves
    665   // disable SROA (ext) w/o some later use that we would see and disable.
    666   Value *SROAArg;
    667   DenseMap<Value *, int>::iterator CostIt;
    668   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
    669     SROAArgValues[&I] = SROAArg;
    670 
    671   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
    672 }
    673 
    674 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
    675   // Propagate constants through ptrtoint.
    676   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
    677         return ConstantExpr::getIntToPtr(COps[0], I.getType());
    678       }))
    679     return true;
    680 
    681   // Track base/offset pairs when round-tripped through a pointer without
    682   // modifications provided the integer is not too large.
    683   Value *Op = I.getOperand(0);
    684   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
    685   if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
    686     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
    687     if (BaseAndOffset.first)
    688       ConstantOffsetPtrs[&I] = BaseAndOffset;
    689   }
    690 
    691   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
    692   Value *SROAArg;
    693   DenseMap<Value *, int>::iterator CostIt;
    694   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
    695     SROAArgValues[&I] = SROAArg;
    696 
    697   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
    698 }
    699 
    700 bool CallAnalyzer::visitCastInst(CastInst &I) {
    701   // Propagate constants through ptrtoint.
    702   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
    703         return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
    704       }))
    705     return true;
    706 
    707   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
    708   disableSROA(I.getOperand(0));
    709 
    710   // If this is a floating-point cast, and the target says this operation
    711   // is expensive, this may eventually become a library call. Treat the cost
    712   // as such.
    713   switch (I.getOpcode()) {
    714   case Instruction::FPTrunc:
    715   case Instruction::FPExt:
    716   case Instruction::UIToFP:
    717   case Instruction::SIToFP:
    718   case Instruction::FPToUI:
    719   case Instruction::FPToSI:
    720     if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
    721       Cost += InlineConstants::CallPenalty;
    722     break;
    723   default:
    724     break;
    725   }
    726 
    727   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
    728 }
    729 
    730 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
    731   Value *Operand = I.getOperand(0);
    732   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
    733         return ConstantFoldInstOperands(&I, COps[0], DL);
    734       }))
    735     return true;
    736 
    737   // Disable any SROA on the argument to arbitrary unary operators.
    738   disableSROA(Operand);
    739 
    740   return false;
    741 }
    742 
    743 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
    744   return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
    745 }
    746 
    747 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
    748   // Does the *call site* have the NonNull attribute set on an argument?  We
    749   // use the attribute on the call site to memoize any analysis done in the
    750   // caller. This will also trip if the callee function has a non-null
    751   // parameter attribute, but that's a less interesting case because hopefully
    752   // the callee would already have been simplified based on that.
    753   if (Argument *A = dyn_cast<Argument>(V))
    754     if (paramHasAttr(A, Attribute::NonNull))
    755       return true;
    756 
    757   // Is this an alloca in the caller?  This is distinct from the attribute case
    758   // above because attributes aren't updated within the inliner itself and we
    759   // always want to catch the alloca derived case.
    760   if (isAllocaDerivedArg(V))
    761     // We can actually predict the result of comparisons between an
    762     // alloca-derived value and null. Note that this fires regardless of
    763     // SROA firing.
    764     return true;
    765 
    766   return false;
    767 }
    768 
    769 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
    770   // If the normal destination of the invoke or the parent block of the call
    771   // site is unreachable-terminated, there is little point in inlining this
    772   // unless there is literally zero cost.
    773   // FIXME: Note that it is possible that an unreachable-terminated block has a
    774   // hot entry. For example, in below scenario inlining hot_call_X() may be
    775   // beneficial :
    776   // main() {
    777   //   hot_call_1();
    778   //   ...
    779   //   hot_call_N()
    780   //   exit(0);
    781   // }
    782   // For now, we are not handling this corner case here as it is rare in real
    783   // code. In future, we should elaborate this based on BPI and BFI in more
    784   // general threshold adjusting heuristics in updateThreshold().
    785   Instruction *Instr = CS.getInstruction();
    786   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
    787     if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
    788       return false;
    789   } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
    790     return false;
    791 
    792   return true;
    793 }
    794 
    795 bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
    796   // If global profile summary is available, then callsite's coldness is
    797   // determined based on that.
    798   if (PSI && PSI->hasProfileSummary())
    799     return PSI->isColdCallSite(CS, CallerBFI);
    800 
    801   // Otherwise we need BFI to be available.
    802   if (!CallerBFI)
    803     return false;
    804 
    805   // Determine if the callsite is cold relative to caller's entry. We could
    806   // potentially cache the computation of scaled entry frequency, but the added
    807   // complexity is not worth it unless this scaling shows up high in the
    808   // profiles.
    809   const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
    810   auto CallSiteBB = CS.getInstruction()->getParent();
    811   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
    812   auto CallerEntryFreq =
    813       CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
    814   return CallSiteFreq < CallerEntryFreq * ColdProb;
    815 }
    816 
    817 Optional<int>
    818 CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
    819                                       BlockFrequencyInfo *CallerBFI) {
    820 
    821   // If global profile summary is available, then callsite's hotness is
    822   // determined based on that.
    823   if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
    824     return Params.HotCallSiteThreshold;
    825 
    826   // Otherwise we need BFI to be available and to have a locally hot callsite
    827   // threshold.
    828   if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
    829     return None;
    830 
    831   // Determine if the callsite is hot relative to caller's entry. We could
    832   // potentially cache the computation of scaled entry frequency, but the added
    833   // complexity is not worth it unless this scaling shows up high in the
    834   // profiles.
    835   auto CallSiteBB = CS.getInstruction()->getParent();
    836   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
    837   auto CallerEntryFreq = CallerBFI->getEntryFreq();
    838   if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
    839     return Params.LocallyHotCallSiteThreshold;
    840 
    841   // Otherwise treat it normally.
    842   return None;
    843 }
    844 
    845 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
    846   // If no size growth is allowed for this inlining, set Threshold to 0.
    847   if (!allowSizeGrowth(CS)) {
    848     Threshold = 0;
    849     return;
    850   }
    851 
    852   Function *Caller = CS.getCaller();
    853 
    854   // return min(A, B) if B is valid.
    855   auto MinIfValid = [](int A, Optional<int> B) {
    856     return B ? std::min(A, B.getValue()) : A;
    857   };
    858 
    859   // return max(A, B) if B is valid.
    860   auto MaxIfValid = [](int A, Optional<int> B) {
    861     return B ? std::max(A, B.getValue()) : A;
    862   };
    863 
    864   // Various bonus percentages. These are multiplied by Threshold to get the
    865   // bonus values.
    866   // SingleBBBonus: This bonus is applied if the callee has a single reachable
    867   // basic block at the given callsite context. This is speculatively applied
    868   // and withdrawn if more than one basic block is seen.
    869   //
    870   // Vector bonuses: We want to more aggressively inline vector-dense kernels
    871   // and apply this bonus based on the percentage of vector instructions. A
    872   // bonus is applied if the vector instructions exceed 50% and half that amount
    873   // is applied if it exceeds 10%. Note that these bonuses are some what
    874   // arbitrary and evolved over time by accident as much as because they are
    875   // principled bonuses.
    876   // FIXME: It would be nice to base the bonus values on something more
    877   // scientific.
    878   //
    879   // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
    880   // of the last call to a static function as inlining such functions is
    881   // guaranteed to reduce code size.
    882   //
    883   // These bonus percentages may be set to 0 based on properties of the caller
    884   // and the callsite.
    885   int SingleBBBonusPercent = 50;
    886   int VectorBonusPercent = 150;
    887   int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
    888 
    889   // Lambda to set all the above bonus and bonus percentages to 0.
    890   auto DisallowAllBonuses = [&]() {
    891     SingleBBBonusPercent = 0;
    892     VectorBonusPercent = 0;
    893     LastCallToStaticBonus = 0;
    894   };
    895 
    896   // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
    897   // and reduce the threshold if the caller has the necessary attribute.
    898   if (Caller->optForMinSize()) {
    899     Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
    900     // For minsize, we want to disable the single BB bonus and the vector
    901     // bonuses, but not the last-call-to-static bonus. Inlining the last call to
    902     // a static function will, at the minimum, eliminate the parameter setup and
    903     // call/return instructions.
    904     SingleBBBonusPercent = 0;
    905     VectorBonusPercent = 0;
    906   } else if (Caller->optForSize())
    907     Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
    908 
    909   // Adjust the threshold based on inlinehint attribute and profile based
    910   // hotness information if the caller does not have MinSize attribute.
    911   if (!Caller->optForMinSize()) {
    912     if (Callee.hasFnAttribute(Attribute::InlineHint))
    913       Threshold = MaxIfValid(Threshold, Params.HintThreshold);
    914 
    915     // FIXME: After switching to the new passmanager, simplify the logic below
    916     // by checking only the callsite hotness/coldness as we will reliably
    917     // have local profile information.
    918     //
    919     // Callsite hotness and coldness can be determined if sample profile is
    920     // used (which adds hotness metadata to calls) or if caller's
    921     // BlockFrequencyInfo is available.
    922     BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
    923     auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
    924     if (!Caller->optForSize() && HotCallSiteThreshold) {
    925       LLVM_DEBUG(dbgs() << "Hot callsite.\n");
    926       // FIXME: This should update the threshold only if it exceeds the
    927       // current threshold, but AutoFDO + ThinLTO currently relies on this
    928       // behavior to prevent inlining of hot callsites during ThinLTO
    929       // compile phase.
    930       Threshold = HotCallSiteThreshold.getValue();
    931     } else if (isColdCallSite(CS, CallerBFI)) {
    932       LLVM_DEBUG(dbgs() << "Cold callsite.\n");
    933       // Do not apply bonuses for a cold callsite including the
    934       // LastCallToStatic bonus. While this bonus might result in code size
    935       // reduction, it can cause the size of a non-cold caller to increase
    936       // preventing it from being inlined.
    937       DisallowAllBonuses();
    938       Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
    939     } else if (PSI) {
    940       // Use callee's global profile information only if we have no way of
    941       // determining this via callsite information.
    942       if (PSI->isFunctionEntryHot(&Callee)) {
    943         LLVM_DEBUG(dbgs() << "Hot callee.\n");
    944         // If callsite hotness can not be determined, we may still know
    945         // that the callee is hot and treat it as a weaker hint for threshold
    946         // increase.
    947         Threshold = MaxIfValid(Threshold, Params.HintThreshold);
    948       } else if (PSI->isFunctionEntryCold(&Callee)) {
    949         LLVM_DEBUG(dbgs() << "Cold callee.\n");
    950         // Do not apply bonuses for a cold callee including the
    951         // LastCallToStatic bonus. While this bonus might result in code size
    952         // reduction, it can cause the size of a non-cold caller to increase
    953         // preventing it from being inlined.
    954         DisallowAllBonuses();
    955         Threshold = MinIfValid(Threshold, Params.ColdThreshold);
    956       }
    957     }
    958   }
    959 
    960   // Finally, take the target-specific inlining threshold multiplier into
    961   // account.
    962   Threshold *= TTI.getInliningThresholdMultiplier();
    963 
    964   SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
    965   VectorBonus = Threshold * VectorBonusPercent / 100;
    966 
    967   bool OnlyOneCallAndLocalLinkage =
    968       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
    969   // If there is only one call of the function, and it has internal linkage,
    970   // the cost of inlining it drops dramatically. It may seem odd to update
    971   // Cost in updateThreshold, but the bonus depends on the logic in this method.
    972   if (OnlyOneCallAndLocalLinkage)
    973     Cost -= LastCallToStaticBonus;
    974 }
    975 
    976 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
    977   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
    978   // First try to handle simplified comparisons.
    979   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
    980         return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
    981       }))
    982     return true;
    983 
    984   if (I.getOpcode() == Instruction::FCmp)
    985     return false;
    986 
    987   // Otherwise look for a comparison between constant offset pointers with
    988   // a common base.
    989   Value *LHSBase, *RHSBase;
    990   APInt LHSOffset, RHSOffset;
    991   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
    992   if (LHSBase) {
    993     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
    994     if (RHSBase && LHSBase == RHSBase) {
    995       // We have common bases, fold the icmp to a constant based on the
    996       // offsets.
    997       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
    998       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
    999       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
   1000         SimplifiedValues[&I] = C;
   1001         ++NumConstantPtrCmps;
   1002         return true;
   1003       }
   1004     }
   1005   }
   1006 
   1007   // If the comparison is an equality comparison with null, we can simplify it
   1008   // if we know the value (argument) can't be null
   1009   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
   1010       isKnownNonNullInCallee(I.getOperand(0))) {
   1011     bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
   1012     SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
   1013                                       : ConstantInt::getFalse(I.getType());
   1014     return true;
   1015   }
   1016   // Finally check for SROA candidates in comparisons.
   1017   Value *SROAArg;
   1018   DenseMap<Value *, int>::iterator CostIt;
   1019   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
   1020     if (isa<ConstantPointerNull>(I.getOperand(1))) {
   1021       accumulateSROACost(CostIt, InlineConstants::InstrCost);
   1022       return true;
   1023     }
   1024 
   1025     disableSROA(CostIt);
   1026   }
   1027 
   1028   return false;
   1029 }
   1030 
   1031 bool CallAnalyzer::visitSub(BinaryOperator &I) {
   1032   // Try to handle a special case: we can fold computing the difference of two
   1033   // constant-related pointers.
   1034   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   1035   Value *LHSBase, *RHSBase;
   1036   APInt LHSOffset, RHSOffset;
   1037   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
   1038   if (LHSBase) {
   1039     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
   1040     if (RHSBase && LHSBase == RHSBase) {
   1041       // We have common bases, fold the subtract to a constant based on the
   1042       // offsets.
   1043       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
   1044       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
   1045       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
   1046         SimplifiedValues[&I] = C;
   1047         ++NumConstantPtrDiffs;
   1048         return true;
   1049       }
   1050     }
   1051   }
   1052 
   1053   // Otherwise, fall back to the generic logic for simplifying and handling
   1054   // instructions.
   1055   return Base::visitSub(I);
   1056 }
   1057 
   1058 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
   1059   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   1060   Constant *CLHS = dyn_cast<Constant>(LHS);
   1061   if (!CLHS)
   1062     CLHS = SimplifiedValues.lookup(LHS);
   1063   Constant *CRHS = dyn_cast<Constant>(RHS);
   1064   if (!CRHS)
   1065     CRHS = SimplifiedValues.lookup(RHS);
   1066 
   1067   Value *SimpleV = nullptr;
   1068   if (auto FI = dyn_cast<FPMathOperator>(&I))
   1069     SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
   1070                               CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
   1071   else
   1072     SimpleV =
   1073         SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
   1074 
   1075   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
   1076     SimplifiedValues[&I] = C;
   1077 
   1078   if (SimpleV)
   1079     return true;
   1080 
   1081   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
   1082   disableSROA(LHS);
   1083   disableSROA(RHS);
   1084 
   1085   // If the instruction is floating point, and the target says this operation
   1086   // is expensive, this may eventually become a library call. Treat the cost
   1087   // as such.
   1088   if (I.getType()->isFloatingPointTy() &&
   1089       TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
   1090     Cost += InlineConstants::CallPenalty;
   1091 
   1092   return false;
   1093 }
   1094 
   1095 bool CallAnalyzer::visitLoad(LoadInst &I) {
   1096   Value *SROAArg;
   1097   DenseMap<Value *, int>::iterator CostIt;
   1098   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
   1099     if (I.isSimple()) {
   1100       accumulateSROACost(CostIt, InlineConstants::InstrCost);
   1101       return true;
   1102     }
   1103 
   1104     disableSROA(CostIt);
   1105   }
   1106 
   1107   // If the data is already loaded from this address and hasn't been clobbered
   1108   // by any stores or calls, this load is likely to be redundant and can be
   1109   // eliminated.
   1110   if (EnableLoadElimination &&
   1111       !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
   1112     LoadEliminationCost += InlineConstants::InstrCost;
   1113     return true;
   1114   }
   1115 
   1116   return false;
   1117 }
   1118 
   1119 bool CallAnalyzer::visitStore(StoreInst &I) {
   1120   Value *SROAArg;
   1121   DenseMap<Value *, int>::iterator CostIt;
   1122   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
   1123     if (I.isSimple()) {
   1124       accumulateSROACost(CostIt, InlineConstants::InstrCost);
   1125       return true;
   1126     }
   1127 
   1128     disableSROA(CostIt);
   1129   }
   1130 
   1131   // The store can potentially clobber loads and prevent repeated loads from
   1132   // being eliminated.
   1133   // FIXME:
   1134   // 1. We can probably keep an initial set of eliminatable loads substracted
   1135   // from the cost even when we finally see a store. We just need to disable
   1136   // *further* accumulation of elimination savings.
   1137   // 2. We should probably at some point thread MemorySSA for the callee into
   1138   // this and then use that to actually compute *really* precise savings.
   1139   disableLoadElimination();
   1140   return false;
   1141 }
   1142 
   1143 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
   1144   // Constant folding for extract value is trivial.
   1145   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
   1146         return ConstantExpr::getExtractValue(COps[0], I.getIndices());
   1147       }))
   1148     return true;
   1149 
   1150   // SROA can look through these but give them a cost.
   1151   return false;
   1152 }
   1153 
   1154 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
   1155   // Constant folding for insert value is trivial.
   1156   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
   1157         return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
   1158                                             /*InsertedValueOperand*/ COps[1],
   1159                                             I.getIndices());
   1160       }))
   1161     return true;
   1162 
   1163   // SROA can look through these but give them a cost.
   1164   return false;
   1165 }
   1166 
   1167 /// Try to simplify a call site.
   1168 ///
   1169 /// Takes a concrete function and callsite and tries to actually simplify it by
   1170 /// analyzing the arguments and call itself with instsimplify. Returns true if
   1171 /// it has simplified the callsite to some other entity (a constant), making it
   1172 /// free.
   1173 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
   1174   // FIXME: Using the instsimplify logic directly for this is inefficient
   1175   // because we have to continually rebuild the argument list even when no
   1176   // simplifications can be performed. Until that is fixed with remapping
   1177   // inside of instsimplify, directly constant fold calls here.
   1178   if (!canConstantFoldCallTo(CS, F))
   1179     return false;
   1180 
   1181   // Try to re-map the arguments to constants.
   1182   SmallVector<Constant *, 4> ConstantArgs;
   1183   ConstantArgs.reserve(CS.arg_size());
   1184   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
   1185        ++I) {
   1186     Constant *C = dyn_cast<Constant>(*I);
   1187     if (!C)
   1188       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
   1189     if (!C)
   1190       return false; // This argument doesn't map to a constant.
   1191 
   1192     ConstantArgs.push_back(C);
   1193   }
   1194   if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
   1195     SimplifiedValues[CS.getInstruction()] = C;
   1196     return true;
   1197   }
   1198 
   1199   return false;
   1200 }
   1201 
   1202 bool CallAnalyzer::visitCallSite(CallSite CS) {
   1203   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
   1204       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
   1205     // This aborts the entire analysis.
   1206     ExposesReturnsTwice = true;
   1207     return false;
   1208   }
   1209   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
   1210     ContainsNoDuplicateCall = true;
   1211 
   1212   if (Function *F = CS.getCalledFunction()) {
   1213     // When we have a concrete function, first try to simplify it directly.
   1214     if (simplifyCallSite(F, CS))
   1215       return true;
   1216 
   1217     // Next check if it is an intrinsic we know about.
   1218     // FIXME: Lift this into part of the InstVisitor.
   1219     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
   1220       switch (II->getIntrinsicID()) {
   1221       default:
   1222         if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
   1223           disableLoadElimination();
   1224         return Base::visitCallSite(CS);
   1225 
   1226       case Intrinsic::load_relative:
   1227         // This is normally lowered to 4 LLVM instructions.
   1228         Cost += 3 * InlineConstants::InstrCost;
   1229         return false;
   1230 
   1231       case Intrinsic::memset:
   1232       case Intrinsic::memcpy:
   1233       case Intrinsic::memmove:
   1234         disableLoadElimination();
   1235         // SROA can usually chew through these intrinsics, but they aren't free.
   1236         return false;
   1237       case Intrinsic::icall_branch_funnel:
   1238       case Intrinsic::localescape:
   1239         HasUninlineableIntrinsic = true;
   1240         return false;
   1241       case Intrinsic::vastart:
   1242       case Intrinsic::vaend:
   1243         UsesVarArgs = true;
   1244         return false;
   1245       }
   1246     }
   1247 
   1248     if (F == CS.getInstruction()->getFunction()) {
   1249       // This flag will fully abort the analysis, so don't bother with anything
   1250       // else.
   1251       IsRecursiveCall = true;
   1252       return false;
   1253     }
   1254 
   1255     if (TTI.isLoweredToCall(F)) {
   1256       // We account for the average 1 instruction per call argument setup
   1257       // here.
   1258       Cost += CS.arg_size() * InlineConstants::InstrCost;
   1259 
   1260       // Everything other than inline ASM will also have a significant cost
   1261       // merely from making the call.
   1262       if (!isa<InlineAsm>(CS.getCalledValue()))
   1263         Cost += InlineConstants::CallPenalty;
   1264     }
   1265 
   1266     if (!CS.onlyReadsMemory())
   1267       disableLoadElimination();
   1268     return Base::visitCallSite(CS);
   1269   }
   1270 
   1271   // Otherwise we're in a very special case -- an indirect function call. See
   1272   // if we can be particularly clever about this.
   1273   Value *Callee = CS.getCalledValue();
   1274 
   1275   // First, pay the price of the argument setup. We account for the average
   1276   // 1 instruction per call argument setup here.
   1277   Cost += CS.arg_size() * InlineConstants::InstrCost;
   1278 
   1279   // Next, check if this happens to be an indirect function call to a known
   1280   // function in this inline context. If not, we've done all we can.
   1281   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
   1282   if (!F) {
   1283     if (!CS.onlyReadsMemory())
   1284       disableLoadElimination();
   1285     return Base::visitCallSite(CS);
   1286   }
   1287 
   1288   // If we have a constant that we are calling as a function, we can peer
   1289   // through it and see the function target. This happens not infrequently
   1290   // during devirtualization and so we want to give it a hefty bonus for
   1291   // inlining, but cap that bonus in the event that inlining wouldn't pan
   1292   // out. Pretend to inline the function, with a custom threshold.
   1293   auto IndirectCallParams = Params;
   1294   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
   1295   CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
   1296                   IndirectCallParams);
   1297   if (CA.analyzeCall(CS)) {
   1298     // We were able to inline the indirect call! Subtract the cost from the
   1299     // threshold to get the bonus we want to apply, but don't go below zero.
   1300     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
   1301   }
   1302 
   1303   if (!F->onlyReadsMemory())
   1304     disableLoadElimination();
   1305   return Base::visitCallSite(CS);
   1306 }
   1307 
   1308 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
   1309   // At least one return instruction will be free after inlining.
   1310   bool Free = !HasReturn;
   1311   HasReturn = true;
   1312   return Free;
   1313 }
   1314 
   1315 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
   1316   // We model unconditional branches as essentially free -- they really
   1317   // shouldn't exist at all, but handling them makes the behavior of the
   1318   // inliner more regular and predictable. Interestingly, conditional branches
   1319   // which will fold away are also free.
   1320   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
   1321          dyn_cast_or_null<ConstantInt>(
   1322              SimplifiedValues.lookup(BI.getCondition()));
   1323 }
   1324 
   1325 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
   1326   bool CheckSROA = SI.getType()->isPointerTy();
   1327   Value *TrueVal = SI.getTrueValue();
   1328   Value *FalseVal = SI.getFalseValue();
   1329 
   1330   Constant *TrueC = dyn_cast<Constant>(TrueVal);
   1331   if (!TrueC)
   1332     TrueC = SimplifiedValues.lookup(TrueVal);
   1333   Constant *FalseC = dyn_cast<Constant>(FalseVal);
   1334   if (!FalseC)
   1335     FalseC = SimplifiedValues.lookup(FalseVal);
   1336   Constant *CondC =
   1337       dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
   1338 
   1339   if (!CondC) {
   1340     // Select C, X, X => X
   1341     if (TrueC == FalseC && TrueC) {
   1342       SimplifiedValues[&SI] = TrueC;
   1343       return true;
   1344     }
   1345 
   1346     if (!CheckSROA)
   1347       return Base::visitSelectInst(SI);
   1348 
   1349     std::pair<Value *, APInt> TrueBaseAndOffset =
   1350         ConstantOffsetPtrs.lookup(TrueVal);
   1351     std::pair<Value *, APInt> FalseBaseAndOffset =
   1352         ConstantOffsetPtrs.lookup(FalseVal);
   1353     if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
   1354       ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
   1355 
   1356       Value *SROAArg;
   1357       DenseMap<Value *, int>::iterator CostIt;
   1358       if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
   1359         SROAArgValues[&SI] = SROAArg;
   1360       return true;
   1361     }
   1362 
   1363     return Base::visitSelectInst(SI);
   1364   }
   1365 
   1366   // Select condition is a constant.
   1367   Value *SelectedV = CondC->isAllOnesValue()
   1368                          ? TrueVal
   1369                          : (CondC->isNullValue()) ? FalseVal : nullptr;
   1370   if (!SelectedV) {
   1371     // Condition is a vector constant that is not all 1s or all 0s.  If all
   1372     // operands are constants, ConstantExpr::getSelect() can handle the cases
   1373     // such as select vectors.
   1374     if (TrueC && FalseC) {
   1375       if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
   1376         SimplifiedValues[&SI] = C;
   1377         return true;
   1378       }
   1379     }
   1380     return Base::visitSelectInst(SI);
   1381   }
   1382 
   1383   // Condition is either all 1s or all 0s. SI can be simplified.
   1384   if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
   1385     SimplifiedValues[&SI] = SelectedC;
   1386     return true;
   1387   }
   1388 
   1389   if (!CheckSROA)
   1390     return true;
   1391 
   1392   std::pair<Value *, APInt> BaseAndOffset =
   1393       ConstantOffsetPtrs.lookup(SelectedV);
   1394   if (BaseAndOffset.first) {
   1395     ConstantOffsetPtrs[&SI] = BaseAndOffset;
   1396 
   1397     Value *SROAArg;
   1398     DenseMap<Value *, int>::iterator CostIt;
   1399     if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
   1400       SROAArgValues[&SI] = SROAArg;
   1401   }
   1402 
   1403   return true;
   1404 }
   1405 
   1406 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
   1407   // We model unconditional switches as free, see the comments on handling
   1408   // branches.
   1409   if (isa<ConstantInt>(SI.getCondition()))
   1410     return true;
   1411   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
   1412     if (isa<ConstantInt>(V))
   1413       return true;
   1414 
   1415   // Assume the most general case where the switch is lowered into
   1416   // either a jump table, bit test, or a balanced binary tree consisting of
   1417   // case clusters without merging adjacent clusters with the same
   1418   // destination. We do not consider the switches that are lowered with a mix
   1419   // of jump table/bit test/binary search tree. The cost of the switch is
   1420   // proportional to the size of the tree or the size of jump table range.
   1421   //
   1422   // NB: We convert large switches which are just used to initialize large phi
   1423   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
   1424   // inlining those. It will prevent inlining in cases where the optimization
   1425   // does not (yet) fire.
   1426 
   1427   // Maximum valid cost increased in this function.
   1428   int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
   1429 
   1430   // Exit early for a large switch, assuming one case needs at least one
   1431   // instruction.
   1432   // FIXME: This is not true for a bit test, but ignore such case for now to
   1433   // save compile-time.
   1434   int64_t CostLowerBound =
   1435       std::min((int64_t)CostUpperBound,
   1436                (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
   1437 
   1438   if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
   1439     Cost = CostLowerBound;
   1440     return false;
   1441   }
   1442 
   1443   unsigned JumpTableSize = 0;
   1444   unsigned NumCaseCluster =
   1445       TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
   1446 
   1447   // If suitable for a jump table, consider the cost for the table size and
   1448   // branch to destination.
   1449   if (JumpTableSize) {
   1450     int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
   1451                      4 * InlineConstants::InstrCost;
   1452 
   1453     Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
   1454     return false;
   1455   }
   1456 
   1457   // Considering forming a binary search, we should find the number of nodes
   1458   // which is same as the number of comparisons when lowered. For a given
   1459   // number of clusters, n, we can define a recursive function, f(n), to find
   1460   // the number of nodes in the tree. The recursion is :
   1461   // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
   1462   // and f(n) = n, when n <= 3.
   1463   // This will lead a binary tree where the leaf should be either f(2) or f(3)
   1464   // when n > 3.  So, the number of comparisons from leaves should be n, while
   1465   // the number of non-leaf should be :
   1466   //   2^(log2(n) - 1) - 1
   1467   //   = 2^log2(n) * 2^-1 - 1
   1468   //   = n / 2 - 1.
   1469   // Considering comparisons from leaf and non-leaf nodes, we can estimate the
   1470   // number of comparisons in a simple closed form :
   1471   //   n + n / 2 - 1 = n * 3 / 2 - 1
   1472   if (NumCaseCluster <= 3) {
   1473     // Suppose a comparison includes one compare and one conditional branch.
   1474     Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
   1475     return false;
   1476   }
   1477 
   1478   int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
   1479   int64_t SwitchCost =
   1480       ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
   1481 
   1482   Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
   1483   return false;
   1484 }
   1485 
   1486 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
   1487   // We never want to inline functions that contain an indirectbr.  This is
   1488   // incorrect because all the blockaddress's (in static global initializers
   1489   // for example) would be referring to the original function, and this
   1490   // indirect jump would jump from the inlined copy of the function into the
   1491   // original function which is extremely undefined behavior.
   1492   // FIXME: This logic isn't really right; we can safely inline functions with
   1493   // indirectbr's as long as no other function or global references the
   1494   // blockaddress of a block within the current function.
   1495   HasIndirectBr = true;
   1496   return false;
   1497 }
   1498 
   1499 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
   1500   // FIXME: It's not clear that a single instruction is an accurate model for
   1501   // the inline cost of a resume instruction.
   1502   return false;
   1503 }
   1504 
   1505 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
   1506   // FIXME: It's not clear that a single instruction is an accurate model for
   1507   // the inline cost of a cleanupret instruction.
   1508   return false;
   1509 }
   1510 
   1511 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
   1512   // FIXME: It's not clear that a single instruction is an accurate model for
   1513   // the inline cost of a catchret instruction.
   1514   return false;
   1515 }
   1516 
   1517 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
   1518   // FIXME: It might be reasonably to discount the cost of instructions leading
   1519   // to unreachable as they have the lowest possible impact on both runtime and
   1520   // code size.
   1521   return true; // No actual code is needed for unreachable.
   1522 }
   1523 
   1524 bool CallAnalyzer::visitInstruction(Instruction &I) {
   1525   // Some instructions are free. All of the free intrinsics can also be
   1526   // handled by SROA, etc.
   1527   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
   1528     return true;
   1529 
   1530   // We found something we don't understand or can't handle. Mark any SROA-able
   1531   // values in the operand list as no longer viable.
   1532   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
   1533     disableSROA(*OI);
   1534 
   1535   return false;
   1536 }
   1537 
   1538 /// Analyze a basic block for its contribution to the inline cost.
   1539 ///
   1540 /// This method walks the analyzer over every instruction in the given basic
   1541 /// block and accounts for their cost during inlining at this callsite. It
   1542 /// aborts early if the threshold has been exceeded or an impossible to inline
   1543 /// construct has been detected. It returns false if inlining is no longer
   1544 /// viable, and true if inlining remains viable.
   1545 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
   1546                                 SmallPtrSetImpl<const Value *> &EphValues) {
   1547   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
   1548     // FIXME: Currently, the number of instructions in a function regardless of
   1549     // our ability to simplify them during inline to constants or dead code,
   1550     // are actually used by the vector bonus heuristic. As long as that's true,
   1551     // we have to special case debug intrinsics here to prevent differences in
   1552     // inlining due to debug symbols. Eventually, the number of unsimplified
   1553     // instructions shouldn't factor into the cost computation, but until then,
   1554     // hack around it here.
   1555     if (isa<DbgInfoIntrinsic>(I))
   1556       continue;
   1557 
   1558     // Skip ephemeral values.
   1559     if (EphValues.count(&*I))
   1560       continue;
   1561 
   1562     ++NumInstructions;
   1563     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
   1564       ++NumVectorInstructions;
   1565 
   1566     // If the instruction simplified to a constant, there is no cost to this
   1567     // instruction. Visit the instructions using our InstVisitor to account for
   1568     // all of the per-instruction logic. The visit tree returns true if we
   1569     // consumed the instruction in any way, and false if the instruction's base
   1570     // cost should count against inlining.
   1571     if (Base::visit(&*I))
   1572       ++NumInstructionsSimplified;
   1573     else
   1574       Cost += InlineConstants::InstrCost;
   1575 
   1576     using namespace ore;
   1577     // If the visit this instruction detected an uninlinable pattern, abort.
   1578     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
   1579         HasIndirectBr || HasUninlineableIntrinsic || UsesVarArgs) {
   1580       if (ORE)
   1581         ORE->emit([&]() {
   1582           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
   1583                                           CandidateCS.getInstruction())
   1584                  << NV("Callee", &F)
   1585                  << " has uninlinable pattern and cost is not fully computed";
   1586         });
   1587       return false;
   1588     }
   1589 
   1590     // If the caller is a recursive function then we don't want to inline
   1591     // functions which allocate a lot of stack space because it would increase
   1592     // the caller stack usage dramatically.
   1593     if (IsCallerRecursive &&
   1594         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
   1595       if (ORE)
   1596         ORE->emit([&]() {
   1597           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
   1598                                           CandidateCS.getInstruction())
   1599                  << NV("Callee", &F)
   1600                  << " is recursive and allocates too much stack space. Cost is "
   1601                     "not fully computed";
   1602         });
   1603       return false;
   1604     }
   1605 
   1606     // Check if we've past the maximum possible threshold so we don't spin in
   1607     // huge basic blocks that will never inline.
   1608     if (Cost >= Threshold && !ComputeFullInlineCost)
   1609       return false;
   1610   }
   1611 
   1612   return true;
   1613 }
   1614 
   1615 /// Compute the base pointer and cumulative constant offsets for V.
   1616 ///
   1617 /// This strips all constant offsets off of V, leaving it the base pointer, and
   1618 /// accumulates the total constant offset applied in the returned constant. It
   1619 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
   1620 /// no constant offsets applied.
   1621 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
   1622   if (!V->getType()->isPointerTy())
   1623     return nullptr;
   1624 
   1625   unsigned AS = V->getType()->getPointerAddressSpace();
   1626   unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
   1627   APInt Offset = APInt::getNullValue(IntPtrWidth);
   1628 
   1629   // Even though we don't look through PHI nodes, we could be called on an
   1630   // instruction in an unreachable block, which may be on a cycle.
   1631   SmallPtrSet<Value *, 4> Visited;
   1632   Visited.insert(V);
   1633   do {
   1634     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
   1635       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
   1636         return nullptr;
   1637       V = GEP->getPointerOperand();
   1638     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
   1639       V = cast<Operator>(V)->getOperand(0);
   1640     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
   1641       if (GA->isInterposable())
   1642         break;
   1643       V = GA->getAliasee();
   1644     } else {
   1645       break;
   1646     }
   1647     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
   1648   } while (Visited.insert(V).second);
   1649 
   1650   Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
   1651   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
   1652 }
   1653 
   1654 /// Find dead blocks due to deleted CFG edges during inlining.
   1655 ///
   1656 /// If we know the successor of the current block, \p CurrBB, has to be \p
   1657 /// NextBB, the other successors of \p CurrBB are dead if these successors have
   1658 /// no live incoming CFG edges.  If one block is found to be dead, we can
   1659 /// continue growing the dead block list by checking the successors of the dead
   1660 /// blocks to see if all their incoming edges are dead or not.
   1661 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
   1662   auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
   1663     // A CFG edge is dead if the predecessor is dead or the predessor has a
   1664     // known successor which is not the one under exam.
   1665     return (DeadBlocks.count(Pred) ||
   1666             (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
   1667   };
   1668 
   1669   auto IsNewlyDead = [&](BasicBlock *BB) {
   1670     // If all the edges to a block are dead, the block is also dead.
   1671     return (!DeadBlocks.count(BB) &&
   1672             llvm::all_of(predecessors(BB),
   1673                          [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
   1674   };
   1675 
   1676   for (BasicBlock *Succ : successors(CurrBB)) {
   1677     if (Succ == NextBB || !IsNewlyDead(Succ))
   1678       continue;
   1679     SmallVector<BasicBlock *, 4> NewDead;
   1680     NewDead.push_back(Succ);
   1681     while (!NewDead.empty()) {
   1682       BasicBlock *Dead = NewDead.pop_back_val();
   1683       if (DeadBlocks.insert(Dead))
   1684         // Continue growing the dead block lists.
   1685         for (BasicBlock *S : successors(Dead))
   1686           if (IsNewlyDead(S))
   1687             NewDead.push_back(S);
   1688     }
   1689   }
   1690 }
   1691 
   1692 /// Analyze a call site for potential inlining.
   1693 ///
   1694 /// Returns true if inlining this call is viable, and false if it is not
   1695 /// viable. It computes the cost and adjusts the threshold based on numerous
   1696 /// factors and heuristics. If this method returns false but the computed cost
   1697 /// is below the computed threshold, then inlining was forcibly disabled by
   1698 /// some artifact of the routine.
   1699 bool CallAnalyzer::analyzeCall(CallSite CS) {
   1700   ++NumCallsAnalyzed;
   1701 
   1702   // Perform some tweaks to the cost and threshold based on the direct
   1703   // callsite information.
   1704 
   1705   // We want to more aggressively inline vector-dense kernels, so up the
   1706   // threshold, and we'll lower it if the % of vector instructions gets too
   1707   // low. Note that these bonuses are some what arbitrary and evolved over time
   1708   // by accident as much as because they are principled bonuses.
   1709   //
   1710   // FIXME: It would be nice to remove all such bonuses. At least it would be
   1711   // nice to base the bonus values on something more scientific.
   1712   assert(NumInstructions == 0);
   1713   assert(NumVectorInstructions == 0);
   1714 
   1715   // Update the threshold based on callsite properties
   1716   updateThreshold(CS, F);
   1717 
   1718   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
   1719   // this Threshold any time, and cost cannot decrease, we can stop processing
   1720   // the rest of the function body.
   1721   Threshold += (SingleBBBonus + VectorBonus);
   1722 
   1723   // Give out bonuses for the callsite, as the instructions setting them up
   1724   // will be gone after inlining.
   1725   Cost -= getCallsiteCost(CS, DL);
   1726 
   1727   // If this function uses the coldcc calling convention, prefer not to inline
   1728   // it.
   1729   if (F.getCallingConv() == CallingConv::Cold)
   1730     Cost += InlineConstants::ColdccPenalty;
   1731 
   1732   // Check if we're done. This can happen due to bonuses and penalties.
   1733   if (Cost >= Threshold && !ComputeFullInlineCost)
   1734     return false;
   1735 
   1736   if (F.empty())
   1737     return true;
   1738 
   1739   Function *Caller = CS.getInstruction()->getFunction();
   1740   // Check if the caller function is recursive itself.
   1741   for (User *U : Caller->users()) {
   1742     CallSite Site(U);
   1743     if (!Site)
   1744       continue;
   1745     Instruction *I = Site.getInstruction();
   1746     if (I->getFunction() == Caller) {
   1747       IsCallerRecursive = true;
   1748       break;
   1749     }
   1750   }
   1751 
   1752   // Populate our simplified values by mapping from function arguments to call
   1753   // arguments with known important simplifications.
   1754   CallSite::arg_iterator CAI = CS.arg_begin();
   1755   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
   1756        FAI != FAE; ++FAI, ++CAI) {
   1757     assert(CAI != CS.arg_end());
   1758     if (Constant *C = dyn_cast<Constant>(CAI))
   1759       SimplifiedValues[&*FAI] = C;
   1760 
   1761     Value *PtrArg = *CAI;
   1762     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
   1763       ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
   1764 
   1765       // We can SROA any pointer arguments derived from alloca instructions.
   1766       if (isa<AllocaInst>(PtrArg)) {
   1767         SROAArgValues[&*FAI] = PtrArg;
   1768         SROAArgCosts[PtrArg] = 0;
   1769       }
   1770     }
   1771   }
   1772   NumConstantArgs = SimplifiedValues.size();
   1773   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
   1774   NumAllocaArgs = SROAArgValues.size();
   1775 
   1776   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
   1777   // the ephemeral values multiple times (and they're completely determined by
   1778   // the callee, so this is purely duplicate work).
   1779   SmallPtrSet<const Value *, 32> EphValues;
   1780   CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
   1781 
   1782   // The worklist of live basic blocks in the callee *after* inlining. We avoid
   1783   // adding basic blocks of the callee which can be proven to be dead for this
   1784   // particular call site in order to get more accurate cost estimates. This
   1785   // requires a somewhat heavyweight iteration pattern: we need to walk the
   1786   // basic blocks in a breadth-first order as we insert live successors. To
   1787   // accomplish this, prioritizing for small iterations because we exit after
   1788   // crossing our threshold, we use a small-size optimized SetVector.
   1789   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
   1790                     SmallPtrSet<BasicBlock *, 16>>
   1791       BBSetVector;
   1792   BBSetVector BBWorklist;
   1793   BBWorklist.insert(&F.getEntryBlock());
   1794   bool SingleBB = true;
   1795   // Note that we *must not* cache the size, this loop grows the worklist.
   1796   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
   1797     // Bail out the moment we cross the threshold. This means we'll under-count
   1798     // the cost, but only when undercounting doesn't matter.
   1799     if (Cost >= Threshold && !ComputeFullInlineCost)
   1800       break;
   1801 
   1802     BasicBlock *BB = BBWorklist[Idx];
   1803     if (BB->empty())
   1804       continue;
   1805 
   1806     // Disallow inlining a blockaddress. A blockaddress only has defined
   1807     // behavior for an indirect branch in the same function, and we do not
   1808     // currently support inlining indirect branches. But, the inliner may not
   1809     // see an indirect branch that ends up being dead code at a particular call
   1810     // site. If the blockaddress escapes the function, e.g., via a global
   1811     // variable, inlining may lead to an invalid cross-function reference.
   1812     if (BB->hasAddressTaken())
   1813       return false;
   1814 
   1815     // Analyze the cost of this block. If we blow through the threshold, this
   1816     // returns false, and we can bail on out.
   1817     if (!analyzeBlock(BB, EphValues))
   1818       return false;
   1819 
   1820     TerminatorInst *TI = BB->getTerminator();
   1821 
   1822     // Add in the live successors by first checking whether we have terminator
   1823     // that may be simplified based on the values simplified by this call.
   1824     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
   1825       if (BI->isConditional()) {
   1826         Value *Cond = BI->getCondition();
   1827         if (ConstantInt *SimpleCond =
   1828                 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
   1829           BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
   1830           BBWorklist.insert(NextBB);
   1831           KnownSuccessors[BB] = NextBB;
   1832           findDeadBlocks(BB, NextBB);
   1833           continue;
   1834         }
   1835       }
   1836     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
   1837       Value *Cond = SI->getCondition();
   1838       if (ConstantInt *SimpleCond =
   1839               dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
   1840         BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
   1841         BBWorklist.insert(NextBB);
   1842         KnownSuccessors[BB] = NextBB;
   1843         findDeadBlocks(BB, NextBB);
   1844         continue;
   1845       }
   1846     }
   1847 
   1848     // If we're unable to select a particular successor, just count all of
   1849     // them.
   1850     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
   1851          ++TIdx)
   1852       BBWorklist.insert(TI->getSuccessor(TIdx));
   1853 
   1854     // If we had any successors at this point, than post-inlining is likely to
   1855     // have them as well. Note that we assume any basic blocks which existed
   1856     // due to branches or switches which folded above will also fold after
   1857     // inlining.
   1858     if (SingleBB && TI->getNumSuccessors() > 1) {
   1859       // Take off the bonus we applied to the threshold.
   1860       Threshold -= SingleBBBonus;
   1861       SingleBB = false;
   1862     }
   1863   }
   1864 
   1865   bool OnlyOneCallAndLocalLinkage =
   1866       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
   1867   // If this is a noduplicate call, we can still inline as long as
   1868   // inlining this would cause the removal of the caller (so the instruction
   1869   // is not actually duplicated, just moved).
   1870   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
   1871     return false;
   1872 
   1873   // We applied the maximum possible vector bonus at the beginning. Now,
   1874   // subtract the excess bonus, if any, from the Threshold before
   1875   // comparing against Cost.
   1876   if (NumVectorInstructions <= NumInstructions / 10)
   1877     Threshold -= VectorBonus;
   1878   else if (NumVectorInstructions <= NumInstructions / 2)
   1879     Threshold -= VectorBonus/2;
   1880 
   1881   return Cost < std::max(1, Threshold);
   1882 }
   1883 
   1884 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   1885 /// Dump stats about this call's analysis.
   1886 LLVM_DUMP_METHOD void CallAnalyzer::dump() {
   1887 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
   1888   DEBUG_PRINT_STAT(NumConstantArgs);
   1889   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
   1890   DEBUG_PRINT_STAT(NumAllocaArgs);
   1891   DEBUG_PRINT_STAT(NumConstantPtrCmps);
   1892   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
   1893   DEBUG_PRINT_STAT(NumInstructionsSimplified);
   1894   DEBUG_PRINT_STAT(NumInstructions);
   1895   DEBUG_PRINT_STAT(SROACostSavings);
   1896   DEBUG_PRINT_STAT(SROACostSavingsLost);
   1897   DEBUG_PRINT_STAT(LoadEliminationCost);
   1898   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
   1899   DEBUG_PRINT_STAT(Cost);
   1900   DEBUG_PRINT_STAT(Threshold);
   1901 #undef DEBUG_PRINT_STAT
   1902 }
   1903 #endif
   1904 
   1905 /// Test that there are no attribute conflicts between Caller and Callee
   1906 ///        that prevent inlining.
   1907 static bool functionsHaveCompatibleAttributes(Function *Caller,
   1908                                               Function *Callee,
   1909                                               TargetTransformInfo &TTI) {
   1910   return TTI.areInlineCompatible(Caller, Callee) &&
   1911          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
   1912 }
   1913 
   1914 int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
   1915   int Cost = 0;
   1916   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
   1917     if (CS.isByValArgument(I)) {
   1918       // We approximate the number of loads and stores needed by dividing the
   1919       // size of the byval type by the target's pointer size.
   1920       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
   1921       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
   1922       unsigned AS = PTy->getAddressSpace();
   1923       unsigned PointerSize = DL.getPointerSizeInBits(AS);
   1924       // Ceiling division.
   1925       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
   1926 
   1927       // If it generates more than 8 stores it is likely to be expanded as an
   1928       // inline memcpy so we take that as an upper bound. Otherwise we assume
   1929       // one load and one store per word copied.
   1930       // FIXME: The maxStoresPerMemcpy setting from the target should be used
   1931       // here instead of a magic number of 8, but it's not available via
   1932       // DataLayout.
   1933       NumStores = std::min(NumStores, 8U);
   1934 
   1935       Cost += 2 * NumStores * InlineConstants::InstrCost;
   1936     } else {
   1937       // For non-byval arguments subtract off one instruction per call
   1938       // argument.
   1939       Cost += InlineConstants::InstrCost;
   1940     }
   1941   }
   1942   // The call instruction also disappears after inlining.
   1943   Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
   1944   return Cost;
   1945 }
   1946 
   1947 InlineCost llvm::getInlineCost(
   1948     CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
   1949     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
   1950     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
   1951     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
   1952   return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
   1953                        GetAssumptionCache, GetBFI, PSI, ORE);
   1954 }
   1955 
   1956 InlineCost llvm::getInlineCost(
   1957     CallSite CS, Function *Callee, const InlineParams &Params,
   1958     TargetTransformInfo &CalleeTTI,
   1959     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
   1960     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
   1961     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
   1962 
   1963   // Cannot inline indirect calls.
   1964   if (!Callee)
   1965     return llvm::InlineCost::getNever();
   1966 
   1967   // Never inline calls with byval arguments that does not have the alloca
   1968   // address space. Since byval arguments can be replaced with a copy to an
   1969   // alloca, the inlined code would need to be adjusted to handle that the
   1970   // argument is in the alloca address space (so it is a little bit complicated
   1971   // to solve).
   1972   unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
   1973   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
   1974     if (CS.isByValArgument(I)) {
   1975       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
   1976       if (PTy->getAddressSpace() != AllocaAS)
   1977         return llvm::InlineCost::getNever();
   1978     }
   1979 
   1980   // Calls to functions with always-inline attributes should be inlined
   1981   // whenever possible.
   1982   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
   1983     if (isInlineViable(*Callee))
   1984       return llvm::InlineCost::getAlways();
   1985     return llvm::InlineCost::getNever();
   1986   }
   1987 
   1988   // Never inline functions with conflicting attributes (unless callee has
   1989   // always-inline attribute).
   1990   Function *Caller = CS.getCaller();
   1991   if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
   1992     return llvm::InlineCost::getNever();
   1993 
   1994   // Don't inline this call if the caller has the optnone attribute.
   1995   if (Caller->hasFnAttribute(Attribute::OptimizeNone))
   1996     return llvm::InlineCost::getNever();
   1997 
   1998   // Don't inline a function that treats null pointer as valid into a caller
   1999   // that does not have this attribute.
   2000   if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
   2001     return llvm::InlineCost::getNever();
   2002 
   2003   // Don't inline functions which can be interposed at link-time.  Don't inline
   2004   // functions marked noinline or call sites marked noinline.
   2005   // Note: inlining non-exact non-interposable functions is fine, since we know
   2006   // we have *a* correct implementation of the source level function.
   2007   if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
   2008       CS.isNoInline())
   2009     return llvm::InlineCost::getNever();
   2010 
   2011   LLVM_DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
   2012                           << "... (caller:" << Caller->getName() << ")\n");
   2013 
   2014   CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
   2015                   Params);
   2016   bool ShouldInline = CA.analyzeCall(CS);
   2017 
   2018   LLVM_DEBUG(CA.dump());
   2019 
   2020   // Check if there was a reason to force inlining or no inlining.
   2021   if (!ShouldInline && CA.getCost() < CA.getThreshold())
   2022     return InlineCost::getNever();
   2023   if (ShouldInline && CA.getCost() >= CA.getThreshold())
   2024     return InlineCost::getAlways();
   2025 
   2026   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
   2027 }
   2028 
   2029 bool llvm::isInlineViable(Function &F) {
   2030   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
   2031   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
   2032     // Disallow inlining of functions which contain indirect branches or
   2033     // blockaddresses.
   2034     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
   2035       return false;
   2036 
   2037     for (auto &II : *BI) {
   2038       CallSite CS(&II);
   2039       if (!CS)
   2040         continue;
   2041 
   2042       // Disallow recursive calls.
   2043       if (&F == CS.getCalledFunction())
   2044         return false;
   2045 
   2046       // Disallow calls which expose returns-twice to a function not previously
   2047       // attributed as such.
   2048       if (!ReturnsTwice && CS.isCall() &&
   2049           cast<CallInst>(CS.getInstruction())->canReturnTwice())
   2050         return false;
   2051 
   2052       if (CS.getCalledFunction())
   2053         switch (CS.getCalledFunction()->getIntrinsicID()) {
   2054         default:
   2055           break;
   2056         // Disallow inlining of @llvm.icall.branch.funnel because current
   2057         // backend can't separate call targets from call arguments.
   2058         case llvm::Intrinsic::icall_branch_funnel:
   2059         // Disallow inlining functions that call @llvm.localescape. Doing this
   2060         // correctly would require major changes to the inliner.
   2061         case llvm::Intrinsic::localescape:
   2062         // Disallow inlining of functions that access VarArgs.
   2063         case llvm::Intrinsic::vastart:
   2064         case llvm::Intrinsic::vaend:
   2065           return false;
   2066         }
   2067     }
   2068   }
   2069 
   2070   return true;
   2071 }
   2072 
   2073 // APIs to create InlineParams based on command line flags and/or other
   2074 // parameters.
   2075 
   2076 InlineParams llvm::getInlineParams(int Threshold) {
   2077   InlineParams Params;
   2078 
   2079   // This field is the threshold to use for a callee by default. This is
   2080   // derived from one or more of:
   2081   //  * optimization or size-optimization levels,
   2082   //  * a value passed to createFunctionInliningPass function, or
   2083   //  * the -inline-threshold flag.
   2084   //  If the -inline-threshold flag is explicitly specified, that is used
   2085   //  irrespective of anything else.
   2086   if (InlineThreshold.getNumOccurrences() > 0)
   2087     Params.DefaultThreshold = InlineThreshold;
   2088   else
   2089     Params.DefaultThreshold = Threshold;
   2090 
   2091   // Set the HintThreshold knob from the -inlinehint-threshold.
   2092   Params.HintThreshold = HintThreshold;
   2093 
   2094   // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
   2095   Params.HotCallSiteThreshold = HotCallSiteThreshold;
   2096 
   2097   // If the -locally-hot-callsite-threshold is explicitly specified, use it to
   2098   // populate LocallyHotCallSiteThreshold. Later, we populate
   2099   // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
   2100   // we know that optimization level is O3 (in the getInlineParams variant that
   2101   // takes the opt and size levels).
   2102   // FIXME: Remove this check (and make the assignment unconditional) after
   2103   // addressing size regression issues at O2.
   2104   if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
   2105     Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
   2106 
   2107   // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
   2108   Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
   2109 
   2110   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
   2111   // -inlinehint-threshold commandline option is not explicitly given. If that
   2112   // option is present, then its value applies even for callees with size and
   2113   // minsize attributes.
   2114   // If the -inline-threshold is not specified, set the ColdThreshold from the
   2115   // -inlinecold-threshold even if it is not explicitly passed. If
   2116   // -inline-threshold is specified, then -inlinecold-threshold needs to be
   2117   // explicitly specified to set the ColdThreshold knob
   2118   if (InlineThreshold.getNumOccurrences() == 0) {
   2119     Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
   2120     Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
   2121     Params.ColdThreshold = ColdThreshold;
   2122   } else if (ColdThreshold.getNumOccurrences() > 0) {
   2123     Params.ColdThreshold = ColdThreshold;
   2124   }
   2125   return Params;
   2126 }
   2127 
   2128 InlineParams llvm::getInlineParams() {
   2129   return getInlineParams(InlineThreshold);
   2130 }
   2131 
   2132 // Compute the default threshold for inlining based on the opt level and the
   2133 // size opt level.
   2134 static int computeThresholdFromOptLevels(unsigned OptLevel,
   2135                                          unsigned SizeOptLevel) {
   2136   if (OptLevel > 2)
   2137     return InlineConstants::OptAggressiveThreshold;
   2138   if (SizeOptLevel == 1) // -Os
   2139     return InlineConstants::OptSizeThreshold;
   2140   if (SizeOptLevel == 2) // -Oz
   2141     return InlineConstants::OptMinSizeThreshold;
   2142   return InlineThreshold;
   2143 }
   2144 
   2145 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
   2146   auto Params =
   2147       getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
   2148   // At O3, use the value of -locally-hot-callsite-threshold option to populate
   2149   // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
   2150   // when it is specified explicitly.
   2151   if (OptLevel > 2)
   2152     Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
   2153   return Params;
   2154 }
   2155