Home | History | Annotate | Download | only in Instrumentation
      1 //===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===//
      2 //
      3 //                      The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the transformation that promotes indirect calls to
     11 // conditional direct calls when the indirect-call value profile metadata is
     12 // available.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "llvm/ADT/STLExtras.h"
     17 #include "llvm/ADT/Statistic.h"
     18 #include "llvm/ADT/Triple.h"
     19 #include "llvm/Analysis/CFG.h"
     20 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
     21 #include "llvm/Analysis/IndirectCallSiteVisitor.h"
     22 #include "llvm/IR/CallSite.h"
     23 #include "llvm/IR/DiagnosticInfo.h"
     24 #include "llvm/IR/IRBuilder.h"
     25 #include "llvm/IR/InstIterator.h"
     26 #include "llvm/IR/InstVisitor.h"
     27 #include "llvm/IR/Instructions.h"
     28 #include "llvm/IR/IntrinsicInst.h"
     29 #include "llvm/IR/MDBuilder.h"
     30 #include "llvm/IR/Module.h"
     31 #include "llvm/Pass.h"
     32 #include "llvm/ProfileData/InstrProfReader.h"
     33 #include "llvm/Support/Debug.h"
     34 #include "llvm/Transforms/Instrumentation.h"
     35 #include "llvm/Transforms/PGOInstrumentation.h"
     36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     37 #include <string>
     38 #include <utility>
     39 #include <vector>
     40 
     41 using namespace llvm;
     42 
     43 #define DEBUG_TYPE "pgo-icall-prom"
     44 
     45 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
     46 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
     47 
     48 // Command line option to disable indirect-call promotion with the default as
     49 // false. This is for debug purpose.
     50 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
     51                                 cl::desc("Disable indirect call promotion"));
     52 
     53 // Set the cutoff value for the promotion. If the value is other than 0, we
     54 // stop the transformation once the total number of promotions equals the cutoff
     55 // value.
     56 // For debug use only.
     57 static cl::opt<unsigned>
     58     ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
     59               cl::desc("Max number of promotions for this compilaiton"));
     60 
     61 // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
     62 // For debug use only.
     63 static cl::opt<unsigned>
     64     ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
     65               cl::desc("Skip Callsite up to this number for this compilaiton"));
     66 
     67 // Set if the pass is called in LTO optimization. The difference for LTO mode
     68 // is the pass won't prefix the source module name to the internal linkage
     69 // symbols.
     70 static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
     71                                 cl::desc("Run indirect-call promotion in LTO "
     72                                          "mode"));
     73 
     74 // If the option is set to true, only call instructions will be considered for
     75 // transformation -- invoke instructions will be ignored.
     76 static cl::opt<bool>
     77     ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
     78                 cl::desc("Run indirect-call promotion for call instructions "
     79                          "only"));
     80 
     81 // If the option is set to true, only invoke instructions will be considered for
     82 // transformation -- call instructions will be ignored.
     83 static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
     84                                    cl::Hidden,
     85                                    cl::desc("Run indirect-call promotion for "
     86                                             "invoke instruction only"));
     87 
     88 // Dump the function level IR if the transformation happened in this
     89 // function. For debug use only.
     90 static cl::opt<bool>
     91     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
     92                  cl::desc("Dump IR after transformation happens"));
     93 
     94 namespace {
     95 class PGOIndirectCallPromotionLegacyPass : public ModulePass {
     96 public:
     97   static char ID;
     98 
     99   PGOIndirectCallPromotionLegacyPass(bool InLTO = false)
    100       : ModulePass(ID), InLTO(InLTO) {
    101     initializePGOIndirectCallPromotionLegacyPassPass(
    102         *PassRegistry::getPassRegistry());
    103   }
    104 
    105   const char *getPassName() const override {
    106     return "PGOIndirectCallPromotion";
    107   }
    108 
    109 private:
    110   bool runOnModule(Module &M) override;
    111 
    112   // If this pass is called in LTO. We need to special handling the PGOFuncName
    113   // for the static variables due to LTO's internalization.
    114   bool InLTO;
    115 };
    116 } // end anonymous namespace
    117 
    118 char PGOIndirectCallPromotionLegacyPass::ID = 0;
    119 INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
    120                 "Use PGO instrumentation profile to promote indirect calls to "
    121                 "direct calls.",
    122                 false, false)
    123 
    124 ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO) {
    125   return new PGOIndirectCallPromotionLegacyPass(InLTO);
    126 }
    127 
    128 namespace {
    129 // The class for main data structure to promote indirect calls to conditional
    130 // direct calls.
    131 class ICallPromotionFunc {
    132 private:
    133   Function &F;
    134   Module *M;
    135 
    136   // Symtab that maps indirect call profile values to function names and
    137   // defines.
    138   InstrProfSymtab *Symtab;
    139 
    140   enum TargetStatus {
    141     OK,                   // Should be able to promote.
    142     NotAvailableInModule, // Cannot find the target in current module.
    143     ReturnTypeMismatch,   // Return type mismatch b/w target and indirect-call.
    144     NumArgsMismatch,      // Number of arguments does not match.
    145     ArgTypeMismatch       // Type mismatch in the arguments (cannot bitcast).
    146   };
    147 
    148   // Test if we can legally promote this direct-call of Target.
    149   TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target,
    150                                 Function *&F);
    151 
    152   // A struct that records the direct target and it's call count.
    153   struct PromotionCandidate {
    154     Function *TargetFunction;
    155     uint64_t Count;
    156     PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
    157   };
    158 
    159   // Check if the indirect-call call site should be promoted. Return the number
    160   // of promotions. Inst is the candidate indirect call, ValueDataRef
    161   // contains the array of value profile data for profiled targets,
    162   // TotalCount is the total profiled count of call executions, and
    163   // NumCandidates is the number of candidate entries in ValueDataRef.
    164   std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
    165       Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
    166       uint64_t TotalCount, uint32_t NumCandidates);
    167 
    168   // Main function that transforms Inst (either a indirect-call instruction, or
    169   // an invoke instruction , to a conditional call to F. This is like:
    170   //     if (Inst.CalledValue == F)
    171   //        F(...);
    172   //     else
    173   //        Inst(...);
    174   //     end
    175   // TotalCount is the profile count value that the instruction executes.
    176   // Count is the profile count value that F is the target function.
    177   // These two values are being used to update the branch weight.
    178   void promote(Instruction *Inst, Function *F, uint64_t Count,
    179                uint64_t TotalCount);
    180 
    181   // Promote a list of targets for one indirect-call callsite. Return
    182   // the number of promotions.
    183   uint32_t tryToPromote(Instruction *Inst,
    184                         const std::vector<PromotionCandidate> &Candidates,
    185                         uint64_t &TotalCount);
    186 
    187   static const char *StatusToString(const TargetStatus S) {
    188     switch (S) {
    189     case OK:
    190       return "OK to promote";
    191     case NotAvailableInModule:
    192       return "Cannot find the target";
    193     case ReturnTypeMismatch:
    194       return "Return type mismatch";
    195     case NumArgsMismatch:
    196       return "The number of arguments mismatch";
    197     case ArgTypeMismatch:
    198       return "Argument Type mismatch";
    199     }
    200     llvm_unreachable("Should not reach here");
    201   }
    202 
    203   // Noncopyable
    204   ICallPromotionFunc(const ICallPromotionFunc &other) = delete;
    205   ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete;
    206 
    207 public:
    208   ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab)
    209       : F(Func), M(Modu), Symtab(Symtab) {
    210   }
    211   bool processFunction();
    212 };
    213 } // end anonymous namespace
    214 
    215 ICallPromotionFunc::TargetStatus
    216 ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target,
    217                                      Function *&TargetFunction) {
    218   Function *DirectCallee = Symtab->getFunction(Target);
    219   if (DirectCallee == nullptr)
    220     return NotAvailableInModule;
    221   // Check the return type.
    222   Type *CallRetType = Inst->getType();
    223   if (!CallRetType->isVoidTy()) {
    224     Type *FuncRetType = DirectCallee->getReturnType();
    225     if (FuncRetType != CallRetType &&
    226         !CastInst::isBitCastable(FuncRetType, CallRetType))
    227       return ReturnTypeMismatch;
    228   }
    229 
    230   // Check if the arguments are compatible with the parameters
    231   FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
    232   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
    233   CallSite CS(Inst);
    234   unsigned ArgNum = CS.arg_size();
    235 
    236   if (ParamNum != ArgNum && !DirectCalleeType->isVarArg())
    237     return NumArgsMismatch;
    238 
    239   for (unsigned I = 0; I < ParamNum; ++I) {
    240     Type *PTy = DirectCalleeType->getFunctionParamType(I);
    241     Type *ATy = CS.getArgument(I)->getType();
    242     if (PTy == ATy)
    243       continue;
    244     if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy))
    245       return ArgTypeMismatch;
    246   }
    247 
    248   DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to "
    249                << Symtab->getFuncName(Target) << "\n");
    250   TargetFunction = DirectCallee;
    251   return OK;
    252 }
    253 
    254 // Indirect-call promotion heuristic. The direct targets are sorted based on
    255 // the count. Stop at the first target that is not promoted.
    256 std::vector<ICallPromotionFunc::PromotionCandidate>
    257 ICallPromotionFunc::getPromotionCandidatesForCallSite(
    258     Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
    259     uint64_t TotalCount, uint32_t NumCandidates) {
    260   std::vector<PromotionCandidate> Ret;
    261 
    262   DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
    263                << " Num_targets: " << ValueDataRef.size()
    264                << " Num_candidates: " << NumCandidates << "\n");
    265   NumOfPGOICallsites++;
    266   if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
    267     DEBUG(dbgs() << " Skip: User options.\n");
    268     return Ret;
    269   }
    270 
    271   for (uint32_t I = 0; I < NumCandidates; I++) {
    272     uint64_t Count = ValueDataRef[I].Count;
    273     assert(Count <= TotalCount);
    274     uint64_t Target = ValueDataRef[I].Value;
    275     DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
    276                  << "  Target_func: " << Target << "\n");
    277 
    278     if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
    279       DEBUG(dbgs() << " Not promote: User options.\n");
    280       break;
    281     }
    282     if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
    283       DEBUG(dbgs() << " Not promote: User option.\n");
    284       break;
    285     }
    286     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
    287       DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
    288       break;
    289     }
    290     Function *TargetFunction = nullptr;
    291     TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction);
    292     if (Status != OK) {
    293       StringRef TargetFuncName = Symtab->getFuncName(Target);
    294       const char *Reason = StatusToString(Status);
    295       DEBUG(dbgs() << " Not promote: " << Reason << "\n");
    296       emitOptimizationRemarkMissed(
    297           F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(),
    298           Twine("Cannot promote indirect call to ") +
    299               (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) +
    300               Twine(" with count of ") + Twine(Count) + ": " + Reason);
    301       break;
    302     }
    303     Ret.push_back(PromotionCandidate(TargetFunction, Count));
    304     TotalCount -= Count;
    305   }
    306   return Ret;
    307 }
    308 
    309 // Create a diamond structure for If_Then_Else. Also update the profile
    310 // count. Do the fix-up for the invoke instruction.
    311 static void createIfThenElse(Instruction *Inst, Function *DirectCallee,
    312                              uint64_t Count, uint64_t TotalCount,
    313                              BasicBlock **DirectCallBB,
    314                              BasicBlock **IndirectCallBB,
    315                              BasicBlock **MergeBB) {
    316   CallSite CS(Inst);
    317   Value *OrigCallee = CS.getCalledValue();
    318 
    319   IRBuilder<> BBBuilder(Inst);
    320   LLVMContext &Ctx = Inst->getContext();
    321   Value *BCI1 =
    322       BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), "");
    323   Value *BCI2 =
    324       BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), "");
    325   Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, "");
    326 
    327   uint64_t ElseCount = TotalCount - Count;
    328   uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
    329   uint64_t Scale = calculateCountScale(MaxCount);
    330   MDBuilder MDB(Inst->getContext());
    331   MDNode *BranchWeights = MDB.createBranchWeights(
    332       scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
    333   TerminatorInst *ThenTerm, *ElseTerm;
    334   SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm,
    335                                 BranchWeights);
    336   *DirectCallBB = ThenTerm->getParent();
    337   (*DirectCallBB)->setName("if.true.direct_targ");
    338   *IndirectCallBB = ElseTerm->getParent();
    339   (*IndirectCallBB)->setName("if.false.orig_indirect");
    340   *MergeBB = Inst->getParent();
    341   (*MergeBB)->setName("if.end.icp");
    342 
    343   // Special handing of Invoke instructions.
    344   InvokeInst *II = dyn_cast<InvokeInst>(Inst);
    345   if (!II)
    346     return;
    347 
    348   // We don't need branch instructions for invoke.
    349   ThenTerm->eraseFromParent();
    350   ElseTerm->eraseFromParent();
    351 
    352   // Add jump from Merge BB to the NormalDest. This is needed for the newly
    353   // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB.
    354   BranchInst::Create(II->getNormalDest(), *MergeBB);
    355 }
    356 
    357 // Find the PHI in BB that have the CallResult as the operand.
    358 static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) {
    359   BasicBlock *From = Inst->getParent();
    360   for (auto &I : *BB) {
    361     PHINode *PHI = dyn_cast<PHINode>(&I);
    362     if (!PHI)
    363       continue;
    364     int IX = PHI->getBasicBlockIndex(From);
    365     if (IX == -1)
    366       continue;
    367     Value *V = PHI->getIncomingValue(IX);
    368     if (dyn_cast<Instruction>(V) == Inst)
    369       return true;
    370   }
    371   return false;
    372 }
    373 
    374 // This method fixes up PHI nodes in BB where BB is the UnwindDest of an
    375 // invoke instruction. In BB, there may be PHIs with incoming block being
    376 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
    377 // instructions to its own BB, OrigBB is no longer the predecessor block of BB.
    378 // Instead two new predecessors are added: IndirectCallBB and DirectCallBB,
    379 // so the PHI node's incoming BBs need to be fixed up accordingly.
    380 static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB,
    381                                   BasicBlock *OrigBB,
    382                                   BasicBlock *IndirectCallBB,
    383                                   BasicBlock *DirectCallBB) {
    384   for (auto &I : *BB) {
    385     PHINode *PHI = dyn_cast<PHINode>(&I);
    386     if (!PHI)
    387       continue;
    388     int IX = PHI->getBasicBlockIndex(OrigBB);
    389     if (IX == -1)
    390       continue;
    391     Value *V = PHI->getIncomingValue(IX);
    392     PHI->addIncoming(V, IndirectCallBB);
    393     PHI->setIncomingBlock(IX, DirectCallBB);
    394   }
    395 }
    396 
    397 // This method fixes up PHI nodes in BB where BB is the NormalDest of an
    398 // invoke instruction. In BB, there may be PHIs with incoming block being
    399 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
    400 // instructions to its own BB, a new incoming edge will be added to the original
    401 // NormalDstBB from the IndirectCallBB.
    402 static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB,
    403                                       BasicBlock *OrigBB,
    404                                       BasicBlock *IndirectCallBB,
    405                                       Instruction *NewInst) {
    406   for (auto &I : *BB) {
    407     PHINode *PHI = dyn_cast<PHINode>(&I);
    408     if (!PHI)
    409       continue;
    410     int IX = PHI->getBasicBlockIndex(OrigBB);
    411     if (IX == -1)
    412       continue;
    413     Value *V = PHI->getIncomingValue(IX);
    414     if (dyn_cast<Instruction>(V) == Inst) {
    415       PHI->setIncomingBlock(IX, IndirectCallBB);
    416       PHI->addIncoming(NewInst, OrigBB);
    417       continue;
    418     }
    419     PHI->addIncoming(V, IndirectCallBB);
    420   }
    421 }
    422 
    423 // Add a bitcast instruction to the direct-call return value if needed.
    424 static Instruction *insertCallRetCast(const Instruction *Inst,
    425                                       Instruction *DirectCallInst,
    426                                       Function *DirectCallee) {
    427   if (Inst->getType()->isVoidTy())
    428     return DirectCallInst;
    429 
    430   Type *CallRetType = Inst->getType();
    431   Type *FuncRetType = DirectCallee->getReturnType();
    432   if (FuncRetType == CallRetType)
    433     return DirectCallInst;
    434 
    435   BasicBlock *InsertionBB;
    436   if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst))
    437     InsertionBB = CI->getParent();
    438   else
    439     InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest();
    440 
    441   return (new BitCastInst(DirectCallInst, CallRetType, "",
    442                           InsertionBB->getTerminator()));
    443 }
    444 
    445 // Create a DirectCall instruction in the DirectCallBB.
    446 // Parameter Inst is the indirect-call (invoke) instruction.
    447 // DirectCallee is the decl of the direct-call (invoke) target.
    448 // DirecallBB is the BB that the direct-call (invoke) instruction is inserted.
    449 // MergeBB is the bottom BB of the if-then-else-diamond after the
    450 // transformation. For invoke instruction, the edges from DirectCallBB and
    451 // IndirectCallBB to MergeBB are removed before this call (during
    452 // createIfThenElse).
    453 static Instruction *createDirectCallInst(const Instruction *Inst,
    454                                          Function *DirectCallee,
    455                                          BasicBlock *DirectCallBB,
    456                                          BasicBlock *MergeBB) {
    457   Instruction *NewInst = Inst->clone();
    458   if (CallInst *CI = dyn_cast<CallInst>(NewInst)) {
    459     CI->setCalledFunction(DirectCallee);
    460     CI->mutateFunctionType(DirectCallee->getFunctionType());
    461   } else {
    462     // Must be an invoke instruction. Direct invoke's normal destination is
    463     // fixed up to MergeBB. MergeBB is the place where return cast is inserted.
    464     // Also since IndirectCallBB does not have an edge to MergeBB, there is no
    465     // need to insert new PHIs into MergeBB.
    466     InvokeInst *II = dyn_cast<InvokeInst>(NewInst);
    467     assert(II);
    468     II->setCalledFunction(DirectCallee);
    469     II->mutateFunctionType(DirectCallee->getFunctionType());
    470     II->setNormalDest(MergeBB);
    471   }
    472 
    473   DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(),
    474                                      NewInst);
    475 
    476   // Clear the value profile data.
    477   NewInst->setMetadata(LLVMContext::MD_prof, 0);
    478   CallSite NewCS(NewInst);
    479   FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
    480   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
    481   for (unsigned I = 0; I < ParamNum; ++I) {
    482     Type *ATy = NewCS.getArgument(I)->getType();
    483     Type *PTy = DirectCalleeType->getParamType(I);
    484     if (ATy != PTy) {
    485       BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst);
    486       NewCS.setArgument(I, BI);
    487     }
    488   }
    489 
    490   return insertCallRetCast(Inst, NewInst, DirectCallee);
    491 }
    492 
    493 // Create a PHI to unify the return values of calls.
    494 static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult,
    495                              Function *DirectCallee) {
    496   if (Inst->getType()->isVoidTy())
    497     return;
    498 
    499   BasicBlock *RetValBB = CallResult->getParent();
    500 
    501   BasicBlock *PHIBB;
    502   if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult))
    503     RetValBB = II->getNormalDest();
    504 
    505   PHIBB = RetValBB->getSingleSuccessor();
    506   if (getCallRetPHINode(PHIBB, Inst))
    507     return;
    508 
    509   PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0);
    510   PHIBB->getInstList().push_front(CallRetPHI);
    511   Inst->replaceAllUsesWith(CallRetPHI);
    512   CallRetPHI->addIncoming(Inst, Inst->getParent());
    513   CallRetPHI->addIncoming(CallResult, RetValBB);
    514 }
    515 
    516 // This function does the actual indirect-call promotion transformation:
    517 // For an indirect-call like:
    518 //     Ret = (*Foo)(Args);
    519 // It transforms to:
    520 //     if (Foo == DirectCallee)
    521 //        Ret1 = DirectCallee(Args);
    522 //     else
    523 //        Ret2 = (*Foo)(Args);
    524 //     Ret = phi(Ret1, Ret2);
    525 // It adds type casts for the args do not match the parameters and the return
    526 // value. Branch weights metadata also updated.
    527 void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee,
    528                                  uint64_t Count, uint64_t TotalCount) {
    529   assert(DirectCallee != nullptr);
    530   BasicBlock *BB = Inst->getParent();
    531   // Just to suppress the non-debug build warning.
    532   (void)BB;
    533   DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
    534   DEBUG(dbgs() << *BB << "\n");
    535 
    536   BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB;
    537   createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB,
    538                    &IndirectCallBB, &MergeBB);
    539 
    540   Instruction *NewInst =
    541       createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB);
    542 
    543   // Move Inst from MergeBB to IndirectCallBB.
    544   Inst->removeFromParent();
    545   IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(),
    546                                        Inst);
    547 
    548   if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) {
    549     // At this point, the original indirect invoke instruction has the original
    550     // UnwindDest and NormalDest. For the direct invoke instruction, the
    551     // NormalDest points to MergeBB, and MergeBB jumps to the original
    552     // NormalDest. MergeBB might have a new bitcast instruction for the return
    553     // value. The PHIs are with the original NormalDest. Since we now have two
    554     // incoming edges to NormalDest and UnwindDest, we have to do some fixups.
    555     //
    556     // UnwindDest will not use the return value. So pass nullptr here.
    557     fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB,
    558                           DirectCallBB);
    559     // We don't need to update the operand from NormalDest for DirectCallBB.
    560     // Pass nullptr here.
    561     fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB,
    562                               IndirectCallBB, NewInst);
    563   }
    564 
    565   insertCallRetPHI(Inst, NewInst, DirectCallee);
    566 
    567   DEBUG(dbgs() << "\n== Basic Blocks After ==\n");
    568   DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n");
    569 
    570   emitOptimizationRemark(
    571       F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(),
    572       Twine("Promote indirect call to ") + DirectCallee->getName() +
    573           " with count " + Twine(Count) + " out of " + Twine(TotalCount));
    574 }
    575 
    576 // Promote indirect-call to conditional direct-call for one callsite.
    577 uint32_t ICallPromotionFunc::tryToPromote(
    578     Instruction *Inst, const std::vector<PromotionCandidate> &Candidates,
    579     uint64_t &TotalCount) {
    580   uint32_t NumPromoted = 0;
    581 
    582   for (auto &C : Candidates) {
    583     uint64_t Count = C.Count;
    584     promote(Inst, C.TargetFunction, Count, TotalCount);
    585     assert(TotalCount >= Count);
    586     TotalCount -= Count;
    587     NumOfPGOICallPromotion++;
    588     NumPromoted++;
    589   }
    590   return NumPromoted;
    591 }
    592 
    593 // Traverse all the indirect-call callsite and get the value profile
    594 // annotation to perform indirect-call promotion.
    595 bool ICallPromotionFunc::processFunction() {
    596   bool Changed = false;
    597   ICallPromotionAnalysis ICallAnalysis;
    598   for (auto &I : findIndirectCallSites(F)) {
    599     uint32_t NumVals, NumCandidates;
    600     uint64_t TotalCount;
    601     auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
    602         I, NumVals, TotalCount, NumCandidates);
    603     if (!NumCandidates)
    604       continue;
    605     auto PromotionCandidates = getPromotionCandidatesForCallSite(
    606         I, ICallProfDataRef, TotalCount, NumCandidates);
    607     uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
    608     if (NumPromoted == 0)
    609       continue;
    610 
    611     Changed = true;
    612     // Adjust the MD.prof metadata. First delete the old one.
    613     I->setMetadata(LLVMContext::MD_prof, 0);
    614     // If all promoted, we don't need the MD.prof metadata.
    615     if (TotalCount == 0 || NumPromoted == NumVals)
    616       continue;
    617     // Otherwise we need update with the un-promoted records back.
    618     annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount,
    619                       IPVK_IndirectCallTarget, NumCandidates);
    620   }
    621   return Changed;
    622 }
    623 
    624 // A wrapper function that does the actual work.
    625 static bool promoteIndirectCalls(Module &M, bool InLTO) {
    626   if (DisableICP)
    627     return false;
    628   InstrProfSymtab Symtab;
    629   Symtab.create(M, InLTO);
    630   bool Changed = false;
    631   for (auto &F : M) {
    632     if (F.isDeclaration())
    633       continue;
    634     if (F.hasFnAttribute(Attribute::OptimizeNone))
    635       continue;
    636     ICallPromotionFunc ICallPromotion(F, &M, &Symtab);
    637     bool FuncChanged = ICallPromotion.processFunction();
    638     if (ICPDUMPAFTER && FuncChanged) {
    639       DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
    640       DEBUG(dbgs() << "\n");
    641     }
    642     Changed |= FuncChanged;
    643     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
    644       DEBUG(dbgs() << " Stop: Cutoff reached.\n");
    645       break;
    646     }
    647   }
    648   return Changed;
    649 }
    650 
    651 bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
    652   // Command-line option has the priority for InLTO.
    653   return promoteIndirectCalls(M, InLTO | ICPLTOMode);
    654 }
    655 
    656 PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, AnalysisManager<Module> &AM) {
    657   if (!promoteIndirectCalls(M, InLTO | ICPLTOMode))
    658     return PreservedAnalyses::all();
    659 
    660   return PreservedAnalyses::none();
    661 }
    662