Home | History | Annotate | Download | only in Instrumentation
      1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
     11 // It also builds the data structures and initialization code needed for
     12 // updating execution counts and emitting the profile at runtime.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
     17 #include "llvm/ADT/ArrayRef.h"
     18 #include "llvm/ADT/SmallVector.h"
     19 #include "llvm/ADT/StringRef.h"
     20 #include "llvm/ADT/Triple.h"
     21 #include "llvm/ADT/Twine.h"
     22 #include "llvm/Analysis/LoopInfo.h"
     23 #include "llvm/Analysis/TargetLibraryInfo.h"
     24 #include "llvm/IR/Attributes.h"
     25 #include "llvm/IR/BasicBlock.h"
     26 #include "llvm/IR/Constant.h"
     27 #include "llvm/IR/Constants.h"
     28 #include "llvm/IR/DerivedTypes.h"
     29 #include "llvm/IR/Dominators.h"
     30 #include "llvm/IR/Function.h"
     31 #include "llvm/IR/GlobalValue.h"
     32 #include "llvm/IR/GlobalVariable.h"
     33 #include "llvm/IR/IRBuilder.h"
     34 #include "llvm/IR/Instruction.h"
     35 #include "llvm/IR/Instructions.h"
     36 #include "llvm/IR/IntrinsicInst.h"
     37 #include "llvm/IR/Module.h"
     38 #include "llvm/IR/Type.h"
     39 #include "llvm/Pass.h"
     40 #include "llvm/ProfileData/InstrProf.h"
     41 #include "llvm/Support/Casting.h"
     42 #include "llvm/Support/CommandLine.h"
     43 #include "llvm/Support/Error.h"
     44 #include "llvm/Support/ErrorHandling.h"
     45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     46 #include "llvm/Transforms/Utils/ModuleUtils.h"
     47 #include "llvm/Transforms/Utils/SSAUpdater.h"
     48 #include <algorithm>
     49 #include <cassert>
     50 #include <cstddef>
     51 #include <cstdint>
     52 #include <string>
     53 
     54 using namespace llvm;
     55 
     56 #define DEBUG_TYPE "instrprof"
     57 
     58 // The start and end values of precise value profile range for memory
     59 // intrinsic sizes
     60 cl::opt<std::string> MemOPSizeRange(
     61     "memop-size-range",
     62     cl::desc("Set the range of size in memory intrinsic calls to be profiled "
     63              "precisely, in a format of <start_val>:<end_val>"),
     64     cl::init(""));
     65 
     66 // The value that considered to be large value in  memory intrinsic.
     67 cl::opt<unsigned> MemOPSizeLarge(
     68     "memop-size-large",
     69     cl::desc("Set large value thresthold in memory intrinsic size profiling. "
     70              "Value of 0 disables the large value profiling."),
     71     cl::init(8192));
     72 
     73 namespace {
     74 
     75 cl::opt<bool> DoNameCompression("enable-name-compression",
     76                                 cl::desc("Enable name string compression"),
     77                                 cl::init(true));
     78 
     79 cl::opt<bool> DoHashBasedCounterSplit(
     80     "hash-based-counter-split",
     81     cl::desc("Rename counter variable of a comdat function based on cfg hash"),
     82     cl::init(true));
     83 
     84 cl::opt<bool> ValueProfileStaticAlloc(
     85     "vp-static-alloc",
     86     cl::desc("Do static counter allocation for value profiler"),
     87     cl::init(true));
     88 
     89 cl::opt<double> NumCountersPerValueSite(
     90     "vp-counters-per-site",
     91     cl::desc("The average number of profile counters allocated "
     92              "per value profiling site."),
     93     // This is set to a very small value because in real programs, only
     94     // a very small percentage of value sites have non-zero targets, e.g, 1/30.
     95     // For those sites with non-zero profile, the average number of targets
     96     // is usually smaller than 2.
     97     cl::init(1.0));
     98 
     99 cl::opt<bool> AtomicCounterUpdatePromoted(
    100     "atomic-counter-update-promoted", cl::ZeroOrMore,
    101     cl::desc("Do counter update using atomic fetch add "
    102              " for promoted counters only"),
    103     cl::init(false));
    104 
    105 // If the option is not specified, the default behavior about whether
    106 // counter promotion is done depends on how instrumentaiton lowering
    107 // pipeline is setup, i.e., the default value of true of this option
    108 // does not mean the promotion will be done by default. Explicitly
    109 // setting this option can override the default behavior.
    110 cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
    111                                  cl::desc("Do counter register promotion"),
    112                                  cl::init(false));
    113 cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
    114     cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
    115     cl::desc("Max number counter promotions per loop to avoid"
    116              " increasing register pressure too much"));
    117 
    118 // A debug option
    119 cl::opt<int>
    120     MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
    121                        cl::desc("Max number of allowed counter promotions"));
    122 
    123 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
    124     cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
    125     cl::desc("The max number of exiting blocks of a loop to allow "
    126              " speculative counter promotion"));
    127 
    128 cl::opt<bool> SpeculativeCounterPromotionToLoop(
    129     cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
    130     cl::desc("When the option is false, if the target block is in a loop, "
    131              "the promotion will be disallowed unless the promoted counter "
    132              " update can be further/iteratively promoted into an acyclic "
    133              " region."));
    134 
    135 cl::opt<bool> IterativeCounterPromotion(
    136     cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
    137     cl::desc("Allow counter promotion across the whole loop nest."));
    138 
    139 class InstrProfilingLegacyPass : public ModulePass {
    140   InstrProfiling InstrProf;
    141 
    142 public:
    143   static char ID;
    144 
    145   InstrProfilingLegacyPass() : ModulePass(ID) {}
    146   InstrProfilingLegacyPass(const InstrProfOptions &Options)
    147       : ModulePass(ID), InstrProf(Options) {}
    148 
    149   StringRef getPassName() const override {
    150     return "Frontend instrumentation-based coverage lowering";
    151   }
    152 
    153   bool runOnModule(Module &M) override {
    154     return InstrProf.run(M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
    155   }
    156 
    157   void getAnalysisUsage(AnalysisUsage &AU) const override {
    158     AU.setPreservesCFG();
    159     AU.addRequired<TargetLibraryInfoWrapperPass>();
    160   }
    161 };
    162 
    163 ///
    164 /// A helper class to promote one counter RMW operation in the loop
    165 /// into register update.
    166 ///
    167 /// RWM update for the counter will be sinked out of the loop after
    168 /// the transformation.
    169 ///
    170 class PGOCounterPromoterHelper : public LoadAndStorePromoter {
    171 public:
    172   PGOCounterPromoterHelper(
    173       Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
    174       BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
    175       ArrayRef<Instruction *> InsertPts,
    176       DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
    177       LoopInfo &LI)
    178       : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
    179         InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
    180     assert(isa<LoadInst>(L));
    181     assert(isa<StoreInst>(S));
    182     SSA.AddAvailableValue(PH, Init);
    183   }
    184 
    185   void doExtraRewritesBeforeFinalDeletion() const override {
    186     for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    187       BasicBlock *ExitBlock = ExitBlocks[i];
    188       Instruction *InsertPos = InsertPts[i];
    189       // Get LiveIn value into the ExitBlock. If there are multiple
    190       // predecessors, the value is defined by a PHI node in this
    191       // block.
    192       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
    193       Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
    194       IRBuilder<> Builder(InsertPos);
    195       if (AtomicCounterUpdatePromoted)
    196         // automic update currently can only be promoted across the current
    197         // loop, not the whole loop nest.
    198         Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
    199                                 AtomicOrdering::SequentiallyConsistent);
    200       else {
    201         LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
    202         auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
    203         auto *NewStore = Builder.CreateStore(NewVal, Addr);
    204 
    205         // Now update the parent loop's candidate list:
    206         if (IterativeCounterPromotion) {
    207           auto *TargetLoop = LI.getLoopFor(ExitBlock);
    208           if (TargetLoop)
    209             LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
    210         }
    211       }
    212     }
    213   }
    214 
    215 private:
    216   Instruction *Store;
    217   ArrayRef<BasicBlock *> ExitBlocks;
    218   ArrayRef<Instruction *> InsertPts;
    219   DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
    220   LoopInfo &LI;
    221 };
    222 
    223 /// A helper class to do register promotion for all profile counter
    224 /// updates in a loop.
    225 ///
    226 class PGOCounterPromoter {
    227 public:
    228   PGOCounterPromoter(
    229       DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
    230       Loop &CurLoop, LoopInfo &LI)
    231       : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
    232         LI(LI) {
    233 
    234     SmallVector<BasicBlock *, 8> LoopExitBlocks;
    235     SmallPtrSet<BasicBlock *, 8> BlockSet;
    236     L.getExitBlocks(LoopExitBlocks);
    237 
    238     for (BasicBlock *ExitBlock : LoopExitBlocks) {
    239       if (BlockSet.insert(ExitBlock).second) {
    240         ExitBlocks.push_back(ExitBlock);
    241         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
    242       }
    243     }
    244   }
    245 
    246   bool run(int64_t *NumPromoted) {
    247     // Skip 'infinite' loops:
    248     if (ExitBlocks.size() == 0)
    249       return false;
    250     unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
    251     if (MaxProm == 0)
    252       return false;
    253 
    254     unsigned Promoted = 0;
    255     for (auto &Cand : LoopToCandidates[&L]) {
    256 
    257       SmallVector<PHINode *, 4> NewPHIs;
    258       SSAUpdater SSA(&NewPHIs);
    259       Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
    260 
    261       PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
    262                                         L.getLoopPreheader(), ExitBlocks,
    263                                         InsertPts, LoopToCandidates, LI);
    264       Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
    265       Promoted++;
    266       if (Promoted >= MaxProm)
    267         break;
    268 
    269       (*NumPromoted)++;
    270       if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
    271         break;
    272     }
    273 
    274     LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
    275                       << L.getLoopDepth() << ")\n");
    276     return Promoted != 0;
    277   }
    278 
    279 private:
    280   bool allowSpeculativeCounterPromotion(Loop *LP) {
    281     SmallVector<BasicBlock *, 8> ExitingBlocks;
    282     L.getExitingBlocks(ExitingBlocks);
    283     // Not considierered speculative.
    284     if (ExitingBlocks.size() == 1)
    285       return true;
    286     if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
    287       return false;
    288     return true;
    289   }
    290 
    291   // Returns the max number of Counter Promotions for LP.
    292   unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
    293     // We can't insert into a catchswitch.
    294     SmallVector<BasicBlock *, 8> LoopExitBlocks;
    295     LP->getExitBlocks(LoopExitBlocks);
    296     if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
    297           return isa<CatchSwitchInst>(Exit->getTerminator());
    298         }))
    299       return 0;
    300 
    301     if (!LP->hasDedicatedExits())
    302       return 0;
    303 
    304     BasicBlock *PH = LP->getLoopPreheader();
    305     if (!PH)
    306       return 0;
    307 
    308     SmallVector<BasicBlock *, 8> ExitingBlocks;
    309     LP->getExitingBlocks(ExitingBlocks);
    310     // Not considierered speculative.
    311     if (ExitingBlocks.size() == 1)
    312       return MaxNumOfPromotionsPerLoop;
    313 
    314     if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
    315       return 0;
    316 
    317     // Whether the target block is in a loop does not matter:
    318     if (SpeculativeCounterPromotionToLoop)
    319       return MaxNumOfPromotionsPerLoop;
    320 
    321     // Now check the target block:
    322     unsigned MaxProm = MaxNumOfPromotionsPerLoop;
    323     for (auto *TargetBlock : LoopExitBlocks) {
    324       auto *TargetLoop = LI.getLoopFor(TargetBlock);
    325       if (!TargetLoop)
    326         continue;
    327       unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
    328       unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
    329       MaxProm =
    330           std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
    331                                 PendingCandsInTarget);
    332     }
    333     return MaxProm;
    334   }
    335 
    336   DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
    337   SmallVector<BasicBlock *, 8> ExitBlocks;
    338   SmallVector<Instruction *, 8> InsertPts;
    339   Loop &L;
    340   LoopInfo &LI;
    341 };
    342 
    343 } // end anonymous namespace
    344 
    345 PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
    346   auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
    347   if (!run(M, TLI))
    348     return PreservedAnalyses::all();
    349 
    350   return PreservedAnalyses::none();
    351 }
    352 
    353 char InstrProfilingLegacyPass::ID = 0;
    354 INITIALIZE_PASS_BEGIN(
    355     InstrProfilingLegacyPass, "instrprof",
    356     "Frontend instrumentation-based coverage lowering.", false, false)
    357 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
    358 INITIALIZE_PASS_END(
    359     InstrProfilingLegacyPass, "instrprof",
    360     "Frontend instrumentation-based coverage lowering.", false, false)
    361 
    362 ModulePass *
    363 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
    364   return new InstrProfilingLegacyPass(Options);
    365 }
    366 
    367 static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
    368   InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
    369   if (Inc)
    370     return Inc;
    371   return dyn_cast<InstrProfIncrementInst>(Instr);
    372 }
    373 
    374 bool InstrProfiling::lowerIntrinsics(Function *F) {
    375   bool MadeChange = false;
    376   PromotionCandidates.clear();
    377   for (BasicBlock &BB : *F) {
    378     for (auto I = BB.begin(), E = BB.end(); I != E;) {
    379       auto Instr = I++;
    380       InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
    381       if (Inc) {
    382         lowerIncrement(Inc);
    383         MadeChange = true;
    384       } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
    385         lowerValueProfileInst(Ind);
    386         MadeChange = true;
    387       }
    388     }
    389   }
    390 
    391   if (!MadeChange)
    392     return false;
    393 
    394   promoteCounterLoadStores(F);
    395   return true;
    396 }
    397 
    398 bool InstrProfiling::isCounterPromotionEnabled() const {
    399   if (DoCounterPromotion.getNumOccurrences() > 0)
    400     return DoCounterPromotion;
    401 
    402   return Options.DoCounterPromotion;
    403 }
    404 
    405 void InstrProfiling::promoteCounterLoadStores(Function *F) {
    406   if (!isCounterPromotionEnabled())
    407     return;
    408 
    409   DominatorTree DT(*F);
    410   LoopInfo LI(DT);
    411   DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
    412 
    413   for (const auto &LoadStore : PromotionCandidates) {
    414     auto *CounterLoad = LoadStore.first;
    415     auto *CounterStore = LoadStore.second;
    416     BasicBlock *BB = CounterLoad->getParent();
    417     Loop *ParentLoop = LI.getLoopFor(BB);
    418     if (!ParentLoop)
    419       continue;
    420     LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
    421   }
    422 
    423   SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
    424 
    425   // Do a post-order traversal of the loops so that counter updates can be
    426   // iteratively hoisted outside the loop nest.
    427   for (auto *Loop : llvm::reverse(Loops)) {
    428     PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
    429     Promoter.run(&TotalCountersPromoted);
    430   }
    431 }
    432 
    433 /// Check if the module contains uses of any profiling intrinsics.
    434 static bool containsProfilingIntrinsics(Module &M) {
    435   if (auto *F = M.getFunction(
    436           Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
    437     if (!F->use_empty())
    438       return true;
    439   if (auto *F = M.getFunction(
    440           Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
    441     if (!F->use_empty())
    442       return true;
    443   if (auto *F = M.getFunction(
    444           Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
    445     if (!F->use_empty())
    446       return true;
    447   return false;
    448 }
    449 
    450 bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
    451   this->M = &M;
    452   this->TLI = &TLI;
    453   NamesVar = nullptr;
    454   NamesSize = 0;
    455   ProfileDataMap.clear();
    456   UsedVars.clear();
    457   getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
    458                               MemOPSizeRangeLast);
    459   TT = Triple(M.getTargetTriple());
    460 
    461   // Emit the runtime hook even if no counters are present.
    462   bool MadeChange = emitRuntimeHook();
    463 
    464   // Improve compile time by avoiding linear scans when there is no work.
    465   GlobalVariable *CoverageNamesVar =
    466       M.getNamedGlobal(getCoverageUnusedNamesVarName());
    467   if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
    468     return MadeChange;
    469 
    470   // We did not know how many value sites there would be inside
    471   // the instrumented function. This is counting the number of instrumented
    472   // target value sites to enter it as field in the profile data variable.
    473   for (Function &F : M) {
    474     InstrProfIncrementInst *FirstProfIncInst = nullptr;
    475     for (BasicBlock &BB : F)
    476       for (auto I = BB.begin(), E = BB.end(); I != E; I++)
    477         if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
    478           computeNumValueSiteCounts(Ind);
    479         else if (FirstProfIncInst == nullptr)
    480           FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
    481 
    482     // Value profiling intrinsic lowering requires per-function profile data
    483     // variable to be created first.
    484     if (FirstProfIncInst != nullptr)
    485       static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
    486   }
    487 
    488   for (Function &F : M)
    489     MadeChange |= lowerIntrinsics(&F);
    490 
    491   if (CoverageNamesVar) {
    492     lowerCoverageData(CoverageNamesVar);
    493     MadeChange = true;
    494   }
    495 
    496   if (!MadeChange)
    497     return false;
    498 
    499   emitVNodes();
    500   emitNameData();
    501   emitRegistration();
    502   emitUses();
    503   emitInitialization();
    504   return true;
    505 }
    506 
    507 static Constant *getOrInsertValueProfilingCall(Module &M,
    508                                                const TargetLibraryInfo &TLI,
    509                                                bool IsRange = false) {
    510   LLVMContext &Ctx = M.getContext();
    511   auto *ReturnTy = Type::getVoidTy(M.getContext());
    512 
    513   Constant *Res;
    514   if (!IsRange) {
    515     Type *ParamTypes[] = {
    516 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
    517 #include "llvm/ProfileData/InstrProfData.inc"
    518     };
    519     auto *ValueProfilingCallTy =
    520         FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
    521     Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
    522                                 ValueProfilingCallTy);
    523   } else {
    524     Type *RangeParamTypes[] = {
    525 #define VALUE_RANGE_PROF 1
    526 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
    527 #include "llvm/ProfileData/InstrProfData.inc"
    528 #undef VALUE_RANGE_PROF
    529     };
    530     auto *ValueRangeProfilingCallTy =
    531         FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
    532     Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
    533                                 ValueRangeProfilingCallTy);
    534   }
    535 
    536   if (Function *FunRes = dyn_cast<Function>(Res)) {
    537     if (auto AK = TLI.getExtAttrForI32Param(false))
    538       FunRes->addParamAttr(2, AK);
    539   }
    540   return Res;
    541 }
    542 
    543 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
    544   GlobalVariable *Name = Ind->getName();
    545   uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
    546   uint64_t Index = Ind->getIndex()->getZExtValue();
    547   auto It = ProfileDataMap.find(Name);
    548   if (It == ProfileDataMap.end()) {
    549     PerFunctionProfileData PD;
    550     PD.NumValueSites[ValueKind] = Index + 1;
    551     ProfileDataMap[Name] = PD;
    552   } else if (It->second.NumValueSites[ValueKind] <= Index)
    553     It->second.NumValueSites[ValueKind] = Index + 1;
    554 }
    555 
    556 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
    557   GlobalVariable *Name = Ind->getName();
    558   auto It = ProfileDataMap.find(Name);
    559   assert(It != ProfileDataMap.end() && It->second.DataVar &&
    560          "value profiling detected in function with no counter incerement");
    561 
    562   GlobalVariable *DataVar = It->second.DataVar;
    563   uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
    564   uint64_t Index = Ind->getIndex()->getZExtValue();
    565   for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
    566     Index += It->second.NumValueSites[Kind];
    567 
    568   IRBuilder<> Builder(Ind);
    569   bool IsRange = (Ind->getValueKind()->getZExtValue() ==
    570                   llvm::InstrProfValueKind::IPVK_MemOPSize);
    571   CallInst *Call = nullptr;
    572   if (!IsRange) {
    573     Value *Args[3] = {Ind->getTargetValue(),
    574                       Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
    575                       Builder.getInt32(Index)};
    576     Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
    577   } else {
    578     Value *Args[6] = {
    579         Ind->getTargetValue(),
    580         Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
    581         Builder.getInt32(Index),
    582         Builder.getInt64(MemOPSizeRangeStart),
    583         Builder.getInt64(MemOPSizeRangeLast),
    584         Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
    585     Call =
    586         Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
    587   }
    588   if (auto AK = TLI->getExtAttrForI32Param(false))
    589     Call->addParamAttr(2, AK);
    590   Ind->replaceAllUsesWith(Call);
    591   Ind->eraseFromParent();
    592 }
    593 
    594 void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
    595   GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
    596 
    597   IRBuilder<> Builder(Inc);
    598   uint64_t Index = Inc->getIndex()->getZExtValue();
    599   Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
    600   Value *Load = Builder.CreateLoad(Addr, "pgocount");
    601   auto *Count = Builder.CreateAdd(Load, Inc->getStep());
    602   auto *Store = Builder.CreateStore(Count, Addr);
    603   Inc->replaceAllUsesWith(Store);
    604   if (isCounterPromotionEnabled())
    605     PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
    606   Inc->eraseFromParent();
    607 }
    608 
    609 void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
    610   ConstantArray *Names =
    611       cast<ConstantArray>(CoverageNamesVar->getInitializer());
    612   for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
    613     Constant *NC = Names->getOperand(I);
    614     Value *V = NC->stripPointerCasts();
    615     assert(isa<GlobalVariable>(V) && "Missing reference to function name");
    616     GlobalVariable *Name = cast<GlobalVariable>(V);
    617 
    618     Name->setLinkage(GlobalValue::PrivateLinkage);
    619     ReferencedNames.push_back(Name);
    620     NC->dropAllReferences();
    621   }
    622   CoverageNamesVar->eraseFromParent();
    623 }
    624 
    625 /// Get the name of a profiling variable for a particular function.
    626 static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
    627   StringRef NamePrefix = getInstrProfNameVarPrefix();
    628   StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
    629   Function *F = Inc->getParent()->getParent();
    630   Module *M = F->getParent();
    631   if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
    632       !canRenameComdatFunc(*F))
    633     return (Prefix + Name).str();
    634   uint64_t FuncHash = Inc->getHash()->getZExtValue();
    635   SmallVector<char, 24> HashPostfix;
    636   if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
    637     return (Prefix + Name).str();
    638   return (Prefix + Name + "." + Twine(FuncHash)).str();
    639 }
    640 
    641 static inline bool shouldRecordFunctionAddr(Function *F) {
    642   // Check the linkage
    643   bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
    644   if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
    645       !HasAvailableExternallyLinkage)
    646     return true;
    647 
    648   // A function marked 'alwaysinline' with available_externally linkage can't
    649   // have its address taken. Doing so would create an undefined external ref to
    650   // the function, which would fail to link.
    651   if (HasAvailableExternallyLinkage &&
    652       F->hasFnAttribute(Attribute::AlwaysInline))
    653     return false;
    654 
    655   // Prohibit function address recording if the function is both internal and
    656   // COMDAT. This avoids the profile data variable referencing internal symbols
    657   // in COMDAT.
    658   if (F->hasLocalLinkage() && F->hasComdat())
    659     return false;
    660 
    661   // Check uses of this function for other than direct calls or invokes to it.
    662   // Inline virtual functions have linkeOnceODR linkage. When a key method
    663   // exists, the vtable will only be emitted in the TU where the key method
    664   // is defined. In a TU where vtable is not available, the function won't
    665   // be 'addresstaken'. If its address is not recorded here, the profile data
    666   // with missing address may be picked by the linker leading  to missing
    667   // indirect call target info.
    668   return F->hasAddressTaken() || F->hasLinkOnceLinkage();
    669 }
    670 
    671 static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
    672                                                InstrProfIncrementInst *Inc) {
    673   if (!needsComdatForCounter(F, M))
    674     return nullptr;
    675 
    676   // COFF format requires a COMDAT section to have a key symbol with the same
    677   // name. The linker targeting COFF also requires that the COMDAT
    678   // a section is associated to must precede the associating section. For this
    679   // reason, we must choose the counter var's name as the name of the comdat.
    680   StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
    681                                 ? getInstrProfCountersVarPrefix()
    682                                 : getInstrProfComdatPrefix());
    683   return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
    684 }
    685 
    686 static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
    687   // Don't do this for Darwin.  compiler-rt uses linker magic.
    688   if (Triple(M.getTargetTriple()).isOSDarwin())
    689     return false;
    690 
    691   // Use linker script magic to get data/cnts/name start/end.
    692   if (Triple(M.getTargetTriple()).isOSLinux() ||
    693       Triple(M.getTargetTriple()).isOSFreeBSD() ||
    694       Triple(M.getTargetTriple()).isOSFuchsia() ||
    695       Triple(M.getTargetTriple()).isPS4CPU())
    696     return false;
    697 
    698   return true;
    699 }
    700 
    701 GlobalVariable *
    702 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
    703   GlobalVariable *NamePtr = Inc->getName();
    704   auto It = ProfileDataMap.find(NamePtr);
    705   PerFunctionProfileData PD;
    706   if (It != ProfileDataMap.end()) {
    707     if (It->second.RegionCounters)
    708       return It->second.RegionCounters;
    709     PD = It->second;
    710   }
    711 
    712   // Move the name variable to the right section. Place them in a COMDAT group
    713   // if the associated function is a COMDAT. This will make sure that
    714   // only one copy of counters of the COMDAT function will be emitted after
    715   // linking.
    716   Function *Fn = Inc->getParent()->getParent();
    717   Comdat *ProfileVarsComdat = nullptr;
    718   ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc);
    719 
    720   uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
    721   LLVMContext &Ctx = M->getContext();
    722   ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
    723 
    724   // Create the counters variable.
    725   auto *CounterPtr =
    726       new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
    727                          Constant::getNullValue(CounterTy),
    728                          getVarName(Inc, getInstrProfCountersVarPrefix()));
    729   CounterPtr->setVisibility(NamePtr->getVisibility());
    730   CounterPtr->setSection(
    731       getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
    732   CounterPtr->setAlignment(8);
    733   CounterPtr->setComdat(ProfileVarsComdat);
    734 
    735   auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
    736   // Allocate statically the array of pointers to value profile nodes for
    737   // the current function.
    738   Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
    739   if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
    740     uint64_t NS = 0;
    741     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
    742       NS += PD.NumValueSites[Kind];
    743     if (NS) {
    744       ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
    745 
    746       auto *ValuesVar =
    747           new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(),
    748                              Constant::getNullValue(ValuesTy),
    749                              getVarName(Inc, getInstrProfValuesVarPrefix()));
    750       ValuesVar->setVisibility(NamePtr->getVisibility());
    751       ValuesVar->setSection(
    752           getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
    753       ValuesVar->setAlignment(8);
    754       ValuesVar->setComdat(ProfileVarsComdat);
    755       ValuesPtrExpr =
    756           ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
    757     }
    758   }
    759 
    760   // Create data variable.
    761   auto *Int16Ty = Type::getInt16Ty(Ctx);
    762   auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
    763   Type *DataTypes[] = {
    764 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
    765 #include "llvm/ProfileData/InstrProfData.inc"
    766   };
    767   auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
    768 
    769   Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
    770                                ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
    771                                : ConstantPointerNull::get(Int8PtrTy);
    772 
    773   Constant *Int16ArrayVals[IPVK_Last + 1];
    774   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
    775     Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
    776 
    777   Constant *DataVals[] = {
    778 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
    779 #include "llvm/ProfileData/InstrProfData.inc"
    780   };
    781   auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
    782                                   ConstantStruct::get(DataTy, DataVals),
    783                                   getVarName(Inc, getInstrProfDataVarPrefix()));
    784   Data->setVisibility(NamePtr->getVisibility());
    785   Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
    786   Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
    787   Data->setComdat(ProfileVarsComdat);
    788 
    789   PD.RegionCounters = CounterPtr;
    790   PD.DataVar = Data;
    791   ProfileDataMap[NamePtr] = PD;
    792 
    793   // Mark the data variable as used so that it isn't stripped out.
    794   UsedVars.push_back(Data);
    795   // Now that the linkage set by the FE has been passed to the data and counter
    796   // variables, reset Name variable's linkage and visibility to private so that
    797   // it can be removed later by the compiler.
    798   NamePtr->setLinkage(GlobalValue::PrivateLinkage);
    799   // Collect the referenced names to be used by emitNameData.
    800   ReferencedNames.push_back(NamePtr);
    801 
    802   return CounterPtr;
    803 }
    804 
    805 void InstrProfiling::emitVNodes() {
    806   if (!ValueProfileStaticAlloc)
    807     return;
    808 
    809   // For now only support this on platforms that do
    810   // not require runtime registration to discover
    811   // named section start/end.
    812   if (needsRuntimeRegistrationOfSectionRange(*M))
    813     return;
    814 
    815   size_t TotalNS = 0;
    816   for (auto &PD : ProfileDataMap) {
    817     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
    818       TotalNS += PD.second.NumValueSites[Kind];
    819   }
    820 
    821   if (!TotalNS)
    822     return;
    823 
    824   uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
    825 // Heuristic for small programs with very few total value sites.
    826 // The default value of vp-counters-per-site is chosen based on
    827 // the observation that large apps usually have a low percentage
    828 // of value sites that actually have any profile data, and thus
    829 // the average number of counters per site is low. For small
    830 // apps with very few sites, this may not be true. Bump up the
    831 // number of counters in this case.
    832 #define INSTR_PROF_MIN_VAL_COUNTS 10
    833   if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
    834     NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
    835 
    836   auto &Ctx = M->getContext();
    837   Type *VNodeTypes[] = {
    838 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
    839 #include "llvm/ProfileData/InstrProfData.inc"
    840   };
    841   auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
    842 
    843   ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
    844   auto *VNodesVar = new GlobalVariable(
    845       *M, VNodesTy, false, GlobalValue::PrivateLinkage,
    846       Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
    847   VNodesVar->setSection(
    848       getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
    849   UsedVars.push_back(VNodesVar);
    850 }
    851 
    852 void InstrProfiling::emitNameData() {
    853   std::string UncompressedData;
    854 
    855   if (ReferencedNames.empty())
    856     return;
    857 
    858   std::string CompressedNameStr;
    859   if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
    860                                           DoNameCompression)) {
    861     report_fatal_error(toString(std::move(E)), false);
    862   }
    863 
    864   auto &Ctx = M->getContext();
    865   auto *NamesVal = ConstantDataArray::getString(
    866       Ctx, StringRef(CompressedNameStr), false);
    867   NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
    868                                 GlobalValue::PrivateLinkage, NamesVal,
    869                                 getInstrProfNamesVarName());
    870   NamesSize = CompressedNameStr.size();
    871   NamesVar->setSection(
    872       getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
    873   UsedVars.push_back(NamesVar);
    874 
    875   for (auto *NamePtr : ReferencedNames)
    876     NamePtr->eraseFromParent();
    877 }
    878 
    879 void InstrProfiling::emitRegistration() {
    880   if (!needsRuntimeRegistrationOfSectionRange(*M))
    881     return;
    882 
    883   // Construct the function.
    884   auto *VoidTy = Type::getVoidTy(M->getContext());
    885   auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
    886   auto *Int64Ty = Type::getInt64Ty(M->getContext());
    887   auto *RegisterFTy = FunctionType::get(VoidTy, false);
    888   auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
    889                                      getInstrProfRegFuncsName(), M);
    890   RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
    891   if (Options.NoRedZone)
    892     RegisterF->addFnAttr(Attribute::NoRedZone);
    893 
    894   auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
    895   auto *RuntimeRegisterF =
    896       Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
    897                        getInstrProfRegFuncName(), M);
    898 
    899   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
    900   for (Value *Data : UsedVars)
    901     if (Data != NamesVar && !isa<Function>(Data))
    902       IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
    903 
    904   if (NamesVar) {
    905     Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
    906     auto *NamesRegisterTy =
    907         FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
    908     auto *NamesRegisterF =
    909         Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
    910                          getInstrProfNamesRegFuncName(), M);
    911     IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
    912                                     IRB.getInt64(NamesSize)});
    913   }
    914 
    915   IRB.CreateRetVoid();
    916 }
    917 
    918 bool InstrProfiling::emitRuntimeHook() {
    919   // We expect the linker to be invoked with -u<hook_var> flag for linux,
    920   // for which case there is no need to emit the user function.
    921   if (Triple(M->getTargetTriple()).isOSLinux())
    922     return false;
    923 
    924   // If the module's provided its own runtime, we don't need to do anything.
    925   if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
    926     return false;
    927 
    928   // Declare an external variable that will pull in the runtime initialization.
    929   auto *Int32Ty = Type::getInt32Ty(M->getContext());
    930   auto *Var =
    931       new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
    932                          nullptr, getInstrProfRuntimeHookVarName());
    933 
    934   // Make a function that uses it.
    935   auto *User = Function::Create(FunctionType::get(Int32Ty, false),
    936                                 GlobalValue::LinkOnceODRLinkage,
    937                                 getInstrProfRuntimeHookVarUseFuncName(), M);
    938   User->addFnAttr(Attribute::NoInline);
    939   if (Options.NoRedZone)
    940     User->addFnAttr(Attribute::NoRedZone);
    941   User->setVisibility(GlobalValue::HiddenVisibility);
    942   if (Triple(M->getTargetTriple()).supportsCOMDAT())
    943     User->setComdat(M->getOrInsertComdat(User->getName()));
    944 
    945   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
    946   auto *Load = IRB.CreateLoad(Var);
    947   IRB.CreateRet(Load);
    948 
    949   // Mark the user variable as used so that it isn't stripped out.
    950   UsedVars.push_back(User);
    951   return true;
    952 }
    953 
    954 void InstrProfiling::emitUses() {
    955   if (!UsedVars.empty())
    956     appendToUsed(*M, UsedVars);
    957 }
    958 
    959 void InstrProfiling::emitInitialization() {
    960   StringRef InstrProfileOutput = Options.InstrProfileOutput;
    961 
    962   if (!InstrProfileOutput.empty()) {
    963     // Create variable for profile name.
    964     Constant *ProfileNameConst =
    965         ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
    966     GlobalVariable *ProfileNameVar = new GlobalVariable(
    967         *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
    968         ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
    969     if (TT.supportsCOMDAT()) {
    970       ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
    971       ProfileNameVar->setComdat(M->getOrInsertComdat(
    972           StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
    973     }
    974   }
    975 
    976   Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
    977   if (!RegisterF)
    978     return;
    979 
    980   // Create the initialization function.
    981   auto *VoidTy = Type::getVoidTy(M->getContext());
    982   auto *F = Function::Create(FunctionType::get(VoidTy, false),
    983                              GlobalValue::InternalLinkage,
    984                              getInstrProfInitFuncName(), M);
    985   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
    986   F->addFnAttr(Attribute::NoInline);
    987   if (Options.NoRedZone)
    988     F->addFnAttr(Attribute::NoRedZone);
    989 
    990   // Add the basic block and the necessary calls.
    991   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
    992   if (RegisterF)
    993     IRB.CreateCall(RegisterF, {});
    994   IRB.CreateRetVoid();
    995 
    996   appendToGlobalCtors(*M, F, 0);
    997 }
    998