Home | History | Annotate | Download | only in Instrumentation
      1 //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
      2 //
      3 //                      The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the transformation that promotes indirect calls to
     11 // conditional direct calls when the indirect-call value profile metadata is
     12 // available.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "llvm/ADT/ArrayRef.h"
     17 #include "llvm/ADT/STLExtras.h"
     18 #include "llvm/ADT/SmallVector.h"
     19 #include "llvm/ADT/Statistic.h"
     20 #include "llvm/ADT/StringRef.h"
     21 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
     22 #include "llvm/Analysis/IndirectCallSiteVisitor.h"
     23 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
     24 #include "llvm/Analysis/ProfileSummaryInfo.h"
     25 #include "llvm/IR/Attributes.h"
     26 #include "llvm/IR/BasicBlock.h"
     27 #include "llvm/IR/CallSite.h"
     28 #include "llvm/IR/DerivedTypes.h"
     29 #include "llvm/IR/DiagnosticInfo.h"
     30 #include "llvm/IR/Function.h"
     31 #include "llvm/IR/IRBuilder.h"
     32 #include "llvm/IR/InstrTypes.h"
     33 #include "llvm/IR/Instruction.h"
     34 #include "llvm/IR/Instructions.h"
     35 #include "llvm/IR/LLVMContext.h"
     36 #include "llvm/IR/MDBuilder.h"
     37 #include "llvm/IR/PassManager.h"
     38 #include "llvm/IR/Type.h"
     39 #include "llvm/IR/Value.h"
     40 #include "llvm/Pass.h"
     41 #include "llvm/ProfileData/InstrProf.h"
     42 #include "llvm/Support/Casting.h"
     43 #include "llvm/Support/CommandLine.h"
     44 #include "llvm/Support/Error.h"
     45 #include "llvm/Support/Debug.h"
     46 #include "llvm/Support/raw_ostream.h"
     47 #include "llvm/Transforms/Instrumentation.h"
     48 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
     49 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     50 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
     51 #include <cassert>
     52 #include <cstdint>
     53 #include <memory>
     54 #include <string>
     55 #include <utility>
     56 #include <vector>
     57 
     58 using namespace llvm;
     59 
     60 #define DEBUG_TYPE "pgo-icall-prom"
     61 
     62 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
     63 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
     64 
     65 // Command line option to disable indirect-call promotion with the default as
     66 // false. This is for debug purpose.
     67 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
     68                                 cl::desc("Disable indirect call promotion"));
     69 
     70 // Set the cutoff value for the promotion. If the value is other than 0, we
     71 // stop the transformation once the total number of promotions equals the cutoff
     72 // value.
     73 // For debug use only.
     74 static cl::opt<unsigned>
     75     ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
     76               cl::desc("Max number of promotions for this compilation"));
     77 
     78 // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
     79 // For debug use only.
     80 static cl::opt<unsigned>
     81     ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
     82               cl::desc("Skip Callsite up to this number for this compilation"));
     83 
     84 // Set if the pass is called in LTO optimization. The difference for LTO mode
     85 // is the pass won't prefix the source module name to the internal linkage
     86 // symbols.
     87 static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
     88                                 cl::desc("Run indirect-call promotion in LTO "
     89                                          "mode"));
     90 
     91 // Set if the pass is called in SamplePGO mode. The difference for SamplePGO
     92 // mode is it will add prof metadatato the created direct call.
     93 static cl::opt<bool>
     94     ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
     95                      cl::desc("Run indirect-call promotion in SamplePGO mode"));
     96 
     97 // If the option is set to true, only call instructions will be considered for
     98 // transformation -- invoke instructions will be ignored.
     99 static cl::opt<bool>
    100     ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
    101                 cl::desc("Run indirect-call promotion for call instructions "
    102                          "only"));
    103 
    104 // If the option is set to true, only invoke instructions will be considered for
    105 // transformation -- call instructions will be ignored.
    106 static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
    107                                    cl::Hidden,
    108                                    cl::desc("Run indirect-call promotion for "
    109                                             "invoke instruction only"));
    110 
    111 // Dump the function level IR if the transformation happened in this
    112 // function. For debug use only.
    113 static cl::opt<bool>
    114     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
    115                  cl::desc("Dump IR after transformation happens"));
    116 
    117 namespace {
    118 
    119 class PGOIndirectCallPromotionLegacyPass : public ModulePass {
    120 public:
    121   static char ID;
    122 
    123   PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false)
    124       : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) {
    125     initializePGOIndirectCallPromotionLegacyPassPass(
    126         *PassRegistry::getPassRegistry());
    127   }
    128 
    129   void getAnalysisUsage(AnalysisUsage &AU) const override {
    130     AU.addRequired<ProfileSummaryInfoWrapperPass>();
    131   }
    132 
    133   StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }
    134 
    135 private:
    136   bool runOnModule(Module &M) override;
    137 
    138   // If this pass is called in LTO. We need to special handling the PGOFuncName
    139   // for the static variables due to LTO's internalization.
    140   bool InLTO;
    141 
    142   // If this pass is called in SamplePGO. We need to add the prof metadata to
    143   // the promoted direct call.
    144   bool SamplePGO;
    145 };
    146 
    147 } // end anonymous namespace
    148 
    149 char PGOIndirectCallPromotionLegacyPass::ID = 0;
    150 
    151 INITIALIZE_PASS_BEGIN(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
    152                       "Use PGO instrumentation profile to promote indirect "
    153                       "calls to direct calls.",
    154                       false, false)
    155 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
    156 INITIALIZE_PASS_END(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
    157                     "Use PGO instrumentation profile to promote indirect "
    158                     "calls to direct calls.",
    159                     false, false)
    160 
    161 ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
    162                                                            bool SamplePGO) {
    163   return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
    164 }
    165 
    166 namespace {
    167 
    168 // The class for main data structure to promote indirect calls to conditional
    169 // direct calls.
    170 class ICallPromotionFunc {
    171 private:
    172   Function &F;
    173   Module *M;
    174 
    175   // Symtab that maps indirect call profile values to function names and
    176   // defines.
    177   InstrProfSymtab *Symtab;
    178 
    179   bool SamplePGO;
    180 
    181   OptimizationRemarkEmitter &ORE;
    182 
    183   // A struct that records the direct target and it's call count.
    184   struct PromotionCandidate {
    185     Function *TargetFunction;
    186     uint64_t Count;
    187 
    188     PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
    189   };
    190 
    191   // Check if the indirect-call call site should be promoted. Return the number
    192   // of promotions. Inst is the candidate indirect call, ValueDataRef
    193   // contains the array of value profile data for profiled targets,
    194   // TotalCount is the total profiled count of call executions, and
    195   // NumCandidates is the number of candidate entries in ValueDataRef.
    196   std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
    197       Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
    198       uint64_t TotalCount, uint32_t NumCandidates);
    199 
    200   // Promote a list of targets for one indirect-call callsite. Return
    201   // the number of promotions.
    202   uint32_t tryToPromote(Instruction *Inst,
    203                         const std::vector<PromotionCandidate> &Candidates,
    204                         uint64_t &TotalCount);
    205 
    206 public:
    207   ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
    208                      bool SamplePGO, OptimizationRemarkEmitter &ORE)
    209       : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
    210   ICallPromotionFunc(const ICallPromotionFunc &) = delete;
    211   ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete;
    212 
    213   bool processFunction(ProfileSummaryInfo *PSI);
    214 };
    215 
    216 } // end anonymous namespace
    217 
    218 // Indirect-call promotion heuristic. The direct targets are sorted based on
    219 // the count. Stop at the first target that is not promoted.
    220 std::vector<ICallPromotionFunc::PromotionCandidate>
    221 ICallPromotionFunc::getPromotionCandidatesForCallSite(
    222     Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
    223     uint64_t TotalCount, uint32_t NumCandidates) {
    224   std::vector<PromotionCandidate> Ret;
    225 
    226   LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
    227                     << " Num_targets: " << ValueDataRef.size()
    228                     << " Num_candidates: " << NumCandidates << "\n");
    229   NumOfPGOICallsites++;
    230   if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
    231     LLVM_DEBUG(dbgs() << " Skip: User options.\n");
    232     return Ret;
    233   }
    234 
    235   for (uint32_t I = 0; I < NumCandidates; I++) {
    236     uint64_t Count = ValueDataRef[I].Count;
    237     assert(Count <= TotalCount);
    238     uint64_t Target = ValueDataRef[I].Value;
    239     LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
    240                       << "  Target_func: " << Target << "\n");
    241 
    242     if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
    243       LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
    244       ORE.emit([&]() {
    245         return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
    246                << " Not promote: User options";
    247       });
    248       break;
    249     }
    250     if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
    251       LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
    252       ORE.emit([&]() {
    253         return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
    254                << " Not promote: User options";
    255       });
    256       break;
    257     }
    258     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
    259       LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
    260       ORE.emit([&]() {
    261         return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst)
    262                << " Not promote: Cutoff reached";
    263       });
    264       break;
    265     }
    266 
    267     Function *TargetFunction = Symtab->getFunction(Target);
    268     if (TargetFunction == nullptr) {
    269       LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
    270       ORE.emit([&]() {
    271         return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst)
    272                << "Cannot promote indirect call: target not found";
    273       });
    274       break;
    275     }
    276 
    277     const char *Reason = nullptr;
    278     if (!isLegalToPromote(CallSite(Inst), TargetFunction, &Reason)) {
    279       using namespace ore;
    280 
    281       ORE.emit([&]() {
    282         return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst)
    283                << "Cannot promote indirect call to "
    284                << NV("TargetFunction", TargetFunction) << " with count of "
    285                << NV("Count", Count) << ": " << Reason;
    286       });
    287       break;
    288     }
    289 
    290     Ret.push_back(PromotionCandidate(TargetFunction, Count));
    291     TotalCount -= Count;
    292   }
    293   return Ret;
    294 }
    295 
    296 Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst,
    297                                             Function *DirectCallee,
    298                                             uint64_t Count, uint64_t TotalCount,
    299                                             bool AttachProfToDirectCall,
    300                                             OptimizationRemarkEmitter *ORE) {
    301 
    302   uint64_t ElseCount = TotalCount - Count;
    303   uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
    304   uint64_t Scale = calculateCountScale(MaxCount);
    305   MDBuilder MDB(Inst->getContext());
    306   MDNode *BranchWeights = MDB.createBranchWeights(
    307       scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
    308 
    309   Instruction *NewInst =
    310       promoteCallWithIfThenElse(CallSite(Inst), DirectCallee, BranchWeights);
    311 
    312   if (AttachProfToDirectCall) {
    313     SmallVector<uint32_t, 1> Weights;
    314     Weights.push_back(Count);
    315     MDBuilder MDB(NewInst->getContext());
    316     NewInst->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
    317   }
    318 
    319   using namespace ore;
    320 
    321   if (ORE)
    322     ORE->emit([&]() {
    323       return OptimizationRemark(DEBUG_TYPE, "Promoted", Inst)
    324              << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
    325              << " with count " << NV("Count", Count) << " out of "
    326              << NV("TotalCount", TotalCount);
    327     });
    328   return NewInst;
    329 }
    330 
    331 // Promote indirect-call to conditional direct-call for one callsite.
    332 uint32_t ICallPromotionFunc::tryToPromote(
    333     Instruction *Inst, const std::vector<PromotionCandidate> &Candidates,
    334     uint64_t &TotalCount) {
    335   uint32_t NumPromoted = 0;
    336 
    337   for (auto &C : Candidates) {
    338     uint64_t Count = C.Count;
    339     pgo::promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount,
    340                              SamplePGO, &ORE);
    341     assert(TotalCount >= Count);
    342     TotalCount -= Count;
    343     NumOfPGOICallPromotion++;
    344     NumPromoted++;
    345   }
    346   return NumPromoted;
    347 }
    348 
    349 // Traverse all the indirect-call callsite and get the value profile
    350 // annotation to perform indirect-call promotion.
    351 bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {
    352   bool Changed = false;
    353   ICallPromotionAnalysis ICallAnalysis;
    354   for (auto &I : findIndirectCallSites(F)) {
    355     uint32_t NumVals, NumCandidates;
    356     uint64_t TotalCount;
    357     auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
    358         I, NumVals, TotalCount, NumCandidates);
    359     if (!NumCandidates ||
    360         (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
    361       continue;
    362     auto PromotionCandidates = getPromotionCandidatesForCallSite(
    363         I, ICallProfDataRef, TotalCount, NumCandidates);
    364     uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
    365     if (NumPromoted == 0)
    366       continue;
    367 
    368     Changed = true;
    369     // Adjust the MD.prof metadata. First delete the old one.
    370     I->setMetadata(LLVMContext::MD_prof, nullptr);
    371     // If all promoted, we don't need the MD.prof metadata.
    372     if (TotalCount == 0 || NumPromoted == NumVals)
    373       continue;
    374     // Otherwise we need update with the un-promoted records back.
    375     annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount,
    376                       IPVK_IndirectCallTarget, NumCandidates);
    377   }
    378   return Changed;
    379 }
    380 
    381 // A wrapper function that does the actual work.
    382 static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
    383                                  bool InLTO, bool SamplePGO,
    384                                  ModuleAnalysisManager *AM = nullptr) {
    385   if (DisableICP)
    386     return false;
    387   InstrProfSymtab Symtab;
    388   if (Error E = Symtab.create(M, InLTO)) {
    389     std::string SymtabFailure = toString(std::move(E));
    390     LLVM_DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n");
    391     (void)SymtabFailure;
    392     return false;
    393   }
    394   bool Changed = false;
    395   for (auto &F : M) {
    396     if (F.isDeclaration())
    397       continue;
    398     if (F.hasFnAttribute(Attribute::OptimizeNone))
    399       continue;
    400 
    401     std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
    402     OptimizationRemarkEmitter *ORE;
    403     if (AM) {
    404       auto &FAM =
    405           AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
    406       ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
    407     } else {
    408       OwnedORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
    409       ORE = OwnedORE.get();
    410     }
    411 
    412     ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE);
    413     bool FuncChanged = ICallPromotion.processFunction(PSI);
    414     if (ICPDUMPAFTER && FuncChanged) {
    415       LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
    416       LLVM_DEBUG(dbgs() << "\n");
    417     }
    418     Changed |= FuncChanged;
    419     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
    420       LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
    421       break;
    422     }
    423   }
    424   return Changed;
    425 }
    426 
    427 bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
    428   ProfileSummaryInfo *PSI =
    429       getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
    430 
    431   // Command-line option has the priority for InLTO.
    432   return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
    433                               SamplePGO | ICPSamplePGOMode);
    434 }
    435 
    436 PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
    437                                                 ModuleAnalysisManager &AM) {
    438   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
    439 
    440   if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
    441                             SamplePGO | ICPSamplePGOMode, &AM))
    442     return PreservedAnalyses::all();
    443 
    444   return PreservedAnalyses::none();
    445 }
    446