Home | History | Annotate | Download | only in Utils
      1 //===- InlineFunction.cpp - Code to perform function inlining -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements inlining of a function into a call site, resolving
     11 // parameters and the return value as appropriate.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/Transforms/Utils/Cloning.h"
     16 #include "llvm/ADT/SmallVector.h"
     17 #include "llvm/ADT/StringExtras.h"
     18 #include "llvm/Analysis/CallGraph.h"
     19 #include "llvm/Analysis/InstructionSimplify.h"
     20 #include "llvm/IR/Attributes.h"
     21 #include "llvm/IR/CallSite.h"
     22 #include "llvm/IR/CFG.h"
     23 #include "llvm/IR/Constants.h"
     24 #include "llvm/IR/DataLayout.h"
     25 #include "llvm/IR/DebugInfo.h"
     26 #include "llvm/IR/DerivedTypes.h"
     27 #include "llvm/IR/IRBuilder.h"
     28 #include "llvm/IR/Instructions.h"
     29 #include "llvm/IR/IntrinsicInst.h"
     30 #include "llvm/IR/Intrinsics.h"
     31 #include "llvm/IR/Module.h"
     32 #include "llvm/Transforms/Utils/Local.h"
     33 using namespace llvm;
     34 
     35 bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
     36                           bool InsertLifetime) {
     37   return InlineFunction(CallSite(CI), IFI, InsertLifetime);
     38 }
     39 bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
     40                           bool InsertLifetime) {
     41   return InlineFunction(CallSite(II), IFI, InsertLifetime);
     42 }
     43 
     44 namespace {
     45   /// A class for recording information about inlining through an invoke.
     46   class InvokeInliningInfo {
     47     BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
     48     BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
     49     LandingPadInst *CallerLPad;  ///< LandingPadInst associated with the invoke.
     50     PHINode *InnerEHValuesPHI;   ///< PHI for EH values from landingpad insts.
     51     SmallVector<Value*, 8> UnwindDestPHIValues;
     52 
     53   public:
     54     InvokeInliningInfo(InvokeInst *II)
     55       : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
     56         CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
     57       // If there are PHI nodes in the unwind destination block, we need to keep
     58       // track of which values came into them from the invoke before removing
     59       // the edge from this block.
     60       llvm::BasicBlock *InvokeBB = II->getParent();
     61       BasicBlock::iterator I = OuterResumeDest->begin();
     62       for (; isa<PHINode>(I); ++I) {
     63         // Save the value to use for this edge.
     64         PHINode *PHI = cast<PHINode>(I);
     65         UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
     66       }
     67 
     68       CallerLPad = cast<LandingPadInst>(I);
     69     }
     70 
     71     /// getOuterResumeDest - The outer unwind destination is the target of
     72     /// unwind edges introduced for calls within the inlined function.
     73     BasicBlock *getOuterResumeDest() const {
     74       return OuterResumeDest;
     75     }
     76 
     77     BasicBlock *getInnerResumeDest();
     78 
     79     LandingPadInst *getLandingPadInst() const { return CallerLPad; }
     80 
     81     /// forwardResume - Forward the 'resume' instruction to the caller's landing
     82     /// pad block. When the landing pad block has only one predecessor, this is
     83     /// a simple branch. When there is more than one predecessor, we need to
     84     /// split the landing pad block after the landingpad instruction and jump
     85     /// to there.
     86     void forwardResume(ResumeInst *RI,
     87                        SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
     88 
     89     /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
     90     /// destination block for the given basic block, using the values for the
     91     /// original invoke's source block.
     92     void addIncomingPHIValuesFor(BasicBlock *BB) const {
     93       addIncomingPHIValuesForInto(BB, OuterResumeDest);
     94     }
     95 
     96     void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
     97       BasicBlock::iterator I = dest->begin();
     98       for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
     99         PHINode *phi = cast<PHINode>(I);
    100         phi->addIncoming(UnwindDestPHIValues[i], src);
    101       }
    102     }
    103   };
    104 }
    105 
    106 /// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
    107 BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
    108   if (InnerResumeDest) return InnerResumeDest;
    109 
    110   // Split the landing pad.
    111   BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
    112   InnerResumeDest =
    113     OuterResumeDest->splitBasicBlock(SplitPoint,
    114                                      OuterResumeDest->getName() + ".body");
    115 
    116   // The number of incoming edges we expect to the inner landing pad.
    117   const unsigned PHICapacity = 2;
    118 
    119   // Create corresponding new PHIs for all the PHIs in the outer landing pad.
    120   BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
    121   BasicBlock::iterator I = OuterResumeDest->begin();
    122   for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
    123     PHINode *OuterPHI = cast<PHINode>(I);
    124     PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
    125                                         OuterPHI->getName() + ".lpad-body",
    126                                         InsertPoint);
    127     OuterPHI->replaceAllUsesWith(InnerPHI);
    128     InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
    129   }
    130 
    131   // Create a PHI for the exception values.
    132   InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
    133                                      "eh.lpad-body", InsertPoint);
    134   CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
    135   InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
    136 
    137   // All done.
    138   return InnerResumeDest;
    139 }
    140 
    141 /// forwardResume - Forward the 'resume' instruction to the caller's landing pad
    142 /// block. When the landing pad block has only one predecessor, this is a simple
    143 /// branch. When there is more than one predecessor, we need to split the
    144 /// landing pad block after the landingpad instruction and jump to there.
    145 void InvokeInliningInfo::forwardResume(ResumeInst *RI,
    146                                SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
    147   BasicBlock *Dest = getInnerResumeDest();
    148   BasicBlock *Src = RI->getParent();
    149 
    150   BranchInst::Create(Dest, Src);
    151 
    152   // Update the PHIs in the destination. They were inserted in an order which
    153   // makes this work.
    154   addIncomingPHIValuesForInto(Src, Dest);
    155 
    156   InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
    157   RI->eraseFromParent();
    158 }
    159 
    160 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
    161 /// an invoke, we have to turn all of the calls that can throw into
    162 /// invokes.  This function analyze BB to see if there are any calls, and if so,
    163 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
    164 /// nodes in that block with the values specified in InvokeDestPHIValues.
    165 static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
    166                                                    InvokeInliningInfo &Invoke) {
    167   for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
    168     Instruction *I = BBI++;
    169 
    170     // We only need to check for function calls: inlined invoke
    171     // instructions require no special handling.
    172     CallInst *CI = dyn_cast<CallInst>(I);
    173 
    174     // If this call cannot unwind, don't convert it to an invoke.
    175     // Inline asm calls cannot throw.
    176     if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
    177       continue;
    178 
    179     // Convert this function call into an invoke instruction.  First, split the
    180     // basic block.
    181     BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
    182 
    183     // Delete the unconditional branch inserted by splitBasicBlock
    184     BB->getInstList().pop_back();
    185 
    186     // Create the new invoke instruction.
    187     ImmutableCallSite CS(CI);
    188     SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
    189     InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
    190                                         Invoke.getOuterResumeDest(),
    191                                         InvokeArgs, CI->getName(), BB);
    192     II->setDebugLoc(CI->getDebugLoc());
    193     II->setCallingConv(CI->getCallingConv());
    194     II->setAttributes(CI->getAttributes());
    195 
    196     // Make sure that anything using the call now uses the invoke!  This also
    197     // updates the CallGraph if present, because it uses a WeakVH.
    198     CI->replaceAllUsesWith(II);
    199 
    200     // Delete the original call
    201     Split->getInstList().pop_front();
    202 
    203     // Update any PHI nodes in the exceptional block to indicate that there is
    204     // now a new entry in them.
    205     Invoke.addIncomingPHIValuesFor(BB);
    206     return;
    207   }
    208 }
    209 
    210 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
    211 /// in the body of the inlined function into invokes.
    212 ///
    213 /// II is the invoke instruction being inlined.  FirstNewBlock is the first
    214 /// block of the inlined code (the last block is the end of the function),
    215 /// and InlineCodeInfo is information about the code that got inlined.
    216 static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
    217                                 ClonedCodeInfo &InlinedCodeInfo) {
    218   BasicBlock *InvokeDest = II->getUnwindDest();
    219 
    220   Function *Caller = FirstNewBlock->getParent();
    221 
    222   // The inlined code is currently at the end of the function, scan from the
    223   // start of the inlined code to its end, checking for stuff we need to
    224   // rewrite.
    225   InvokeInliningInfo Invoke(II);
    226 
    227   // Get all of the inlined landing pad instructions.
    228   SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
    229   for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
    230     if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
    231       InlinedLPads.insert(II->getLandingPadInst());
    232 
    233   // Append the clauses from the outer landing pad instruction into the inlined
    234   // landing pad instructions.
    235   LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
    236   for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
    237          E = InlinedLPads.end(); I != E; ++I) {
    238     LandingPadInst *InlinedLPad = *I;
    239     unsigned OuterNum = OuterLPad->getNumClauses();
    240     InlinedLPad->reserveClauses(OuterNum);
    241     for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
    242       InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
    243     if (OuterLPad->isCleanup())
    244       InlinedLPad->setCleanup(true);
    245   }
    246 
    247   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
    248     if (InlinedCodeInfo.ContainsCalls)
    249       HandleCallsInBlockInlinedThroughInvoke(BB, Invoke);
    250 
    251     // Forward any resumes that are remaining here.
    252     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
    253       Invoke.forwardResume(RI, InlinedLPads);
    254   }
    255 
    256   // Now that everything is happy, we have one final detail.  The PHI nodes in
    257   // the exception destination block still have entries due to the original
    258   // invoke instruction. Eliminate these entries (which might even delete the
    259   // PHI node) now.
    260   InvokeDest->removePredecessor(II->getParent());
    261 }
    262 
    263 /// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
    264 /// into the caller, update the specified callgraph to reflect the changes we
    265 /// made.  Note that it's possible that not all code was copied over, so only
    266 /// some edges of the callgraph may remain.
    267 static void UpdateCallGraphAfterInlining(CallSite CS,
    268                                          Function::iterator FirstNewBlock,
    269                                          ValueToValueMapTy &VMap,
    270                                          InlineFunctionInfo &IFI) {
    271   CallGraph &CG = *IFI.CG;
    272   const Function *Caller = CS.getInstruction()->getParent()->getParent();
    273   const Function *Callee = CS.getCalledFunction();
    274   CallGraphNode *CalleeNode = CG[Callee];
    275   CallGraphNode *CallerNode = CG[Caller];
    276 
    277   // Since we inlined some uninlined call sites in the callee into the caller,
    278   // add edges from the caller to all of the callees of the callee.
    279   CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
    280 
    281   // Consider the case where CalleeNode == CallerNode.
    282   CallGraphNode::CalledFunctionsVector CallCache;
    283   if (CalleeNode == CallerNode) {
    284     CallCache.assign(I, E);
    285     I = CallCache.begin();
    286     E = CallCache.end();
    287   }
    288 
    289   for (; I != E; ++I) {
    290     const Value *OrigCall = I->first;
    291 
    292     ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
    293     // Only copy the edge if the call was inlined!
    294     if (VMI == VMap.end() || VMI->second == nullptr)
    295       continue;
    296 
    297     // If the call was inlined, but then constant folded, there is no edge to
    298     // add.  Check for this case.
    299     Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
    300     if (!NewCall) continue;
    301 
    302     // Remember that this call site got inlined for the client of
    303     // InlineFunction.
    304     IFI.InlinedCalls.push_back(NewCall);
    305 
    306     // It's possible that inlining the callsite will cause it to go from an
    307     // indirect to a direct call by resolving a function pointer.  If this
    308     // happens, set the callee of the new call site to a more precise
    309     // destination.  This can also happen if the call graph node of the caller
    310     // was just unnecessarily imprecise.
    311     if (!I->second->getFunction())
    312       if (Function *F = CallSite(NewCall).getCalledFunction()) {
    313         // Indirect call site resolved to direct call.
    314         CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
    315 
    316         continue;
    317       }
    318 
    319     CallerNode->addCalledFunction(CallSite(NewCall), I->second);
    320   }
    321 
    322   // Update the call graph by deleting the edge from Callee to Caller.  We must
    323   // do this after the loop above in case Caller and Callee are the same.
    324   CallerNode->removeCallEdgeFor(CS);
    325 }
    326 
    327 static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
    328                                     BasicBlock *InsertBlock,
    329                                     InlineFunctionInfo &IFI) {
    330   LLVMContext &Context = Src->getContext();
    331   Type *VoidPtrTy = Type::getInt8PtrTy(Context);
    332   Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
    333   Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) };
    334   Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
    335   IRBuilder<> builder(InsertBlock->begin());
    336   Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp");
    337   Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp");
    338 
    339   Value *Size;
    340   if (IFI.DL == nullptr)
    341     Size = ConstantExpr::getSizeOf(AggTy);
    342   else
    343     Size = ConstantInt::get(Type::getInt64Ty(Context),
    344                             IFI.DL->getTypeStoreSize(AggTy));
    345 
    346   // Always generate a memcpy of alignment 1 here because we don't know
    347   // the alignment of the src pointer.  Other optimizations can infer
    348   // better alignment.
    349   Value *CallArgs[] = {
    350     DstCast, SrcCast, Size,
    351     ConstantInt::get(Type::getInt32Ty(Context), 1),
    352     ConstantInt::getFalse(Context) // isVolatile
    353   };
    354   builder.CreateCall(MemCpyFn, CallArgs);
    355 }
    356 
    357 /// HandleByValArgument - When inlining a call site that has a byval argument,
    358 /// we have to make the implicit memcpy explicit by adding it.
    359 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
    360                                   const Function *CalledFunc,
    361                                   InlineFunctionInfo &IFI,
    362                                   unsigned ByValAlignment) {
    363   PointerType *ArgTy = cast<PointerType>(Arg->getType());
    364   Type *AggTy = ArgTy->getElementType();
    365 
    366   // If the called function is readonly, then it could not mutate the caller's
    367   // copy of the byval'd memory.  In this case, it is safe to elide the copy and
    368   // temporary.
    369   if (CalledFunc->onlyReadsMemory()) {
    370     // If the byval argument has a specified alignment that is greater than the
    371     // passed in pointer, then we either have to round up the input pointer or
    372     // give up on this transformation.
    373     if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
    374       return Arg;
    375 
    376     // If the pointer is already known to be sufficiently aligned, or if we can
    377     // round it up to a larger alignment, then we don't need a temporary.
    378     if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
    379                                    IFI.DL) >= ByValAlignment)
    380       return Arg;
    381 
    382     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
    383     // for code quality, but rarely happens and is required for correctness.
    384   }
    385 
    386   // Create the alloca.  If we have DataLayout, use nice alignment.
    387   unsigned Align = 1;
    388   if (IFI.DL)
    389     Align = IFI.DL->getPrefTypeAlignment(AggTy);
    390 
    391   // If the byval had an alignment specified, we *must* use at least that
    392   // alignment, as it is required by the byval argument (and uses of the
    393   // pointer inside the callee).
    394   Align = std::max(Align, ByValAlignment);
    395 
    396   Function *Caller = TheCall->getParent()->getParent();
    397 
    398   Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(),
    399                                     &*Caller->begin()->begin());
    400   IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
    401 
    402   // Uses of the argument in the function should use our new alloca
    403   // instead.
    404   return NewAlloca;
    405 }
    406 
    407 // isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
    408 // intrinsic.
    409 static bool isUsedByLifetimeMarker(Value *V) {
    410   for (User *U : V->users()) {
    411     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
    412       switch (II->getIntrinsicID()) {
    413       default: break;
    414       case Intrinsic::lifetime_start:
    415       case Intrinsic::lifetime_end:
    416         return true;
    417       }
    418     }
    419   }
    420   return false;
    421 }
    422 
    423 // hasLifetimeMarkers - Check whether the given alloca already has
    424 // lifetime.start or lifetime.end intrinsics.
    425 static bool hasLifetimeMarkers(AllocaInst *AI) {
    426   Type *Ty = AI->getType();
    427   Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
    428                                        Ty->getPointerAddressSpace());
    429   if (Ty == Int8PtrTy)
    430     return isUsedByLifetimeMarker(AI);
    431 
    432   // Do a scan to find all the casts to i8*.
    433   for (User *U : AI->users()) {
    434     if (U->getType() != Int8PtrTy) continue;
    435     if (U->stripPointerCasts() != AI) continue;
    436     if (isUsedByLifetimeMarker(U))
    437       return true;
    438   }
    439   return false;
    440 }
    441 
    442 /// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
    443 /// recursively update InlinedAtEntry of a DebugLoc.
    444 static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
    445                                     const DebugLoc &InlinedAtDL,
    446                                     LLVMContext &Ctx) {
    447   if (MDNode *IA = DL.getInlinedAt(Ctx)) {
    448     DebugLoc NewInlinedAtDL
    449       = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
    450     return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
    451                          NewInlinedAtDL.getAsMDNode(Ctx));
    452   }
    453 
    454   return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
    455                        InlinedAtDL.getAsMDNode(Ctx));
    456 }
    457 
    458 /// fixupLineNumbers - Update inlined instructions' line numbers to
    459 /// to encode location where these instructions are inlined.
    460 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
    461                              Instruction *TheCall) {
    462   DebugLoc TheCallDL = TheCall->getDebugLoc();
    463   if (TheCallDL.isUnknown())
    464     return;
    465 
    466   for (; FI != Fn->end(); ++FI) {
    467     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
    468          BI != BE; ++BI) {
    469       DebugLoc DL = BI->getDebugLoc();
    470       if (DL.isUnknown()) {
    471         // If the inlined instruction has no line number, make it look as if it
    472         // originates from the call location. This is important for
    473         // ((__always_inline__, __nodebug__)) functions which must use caller
    474         // location for all instructions in their function body.
    475         BI->setDebugLoc(TheCallDL);
    476       } else {
    477         BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
    478         if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
    479           LLVMContext &Ctx = BI->getContext();
    480           MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
    481           DVI->setOperand(2, createInlinedVariable(DVI->getVariable(),
    482                                                    InlinedAt, Ctx));
    483         }
    484       }
    485     }
    486   }
    487 }
    488 
    489 /// Returns a musttail call instruction if one immediately precedes the given
    490 /// return instruction with an optional bitcast instruction between them.
    491 static CallInst *getPrecedingMustTailCall(ReturnInst *RI) {
    492   Instruction *Prev = RI->getPrevNode();
    493   if (!Prev)
    494     return nullptr;
    495 
    496   if (Value *RV = RI->getReturnValue()) {
    497     if (RV != Prev)
    498       return nullptr;
    499 
    500     // Look through the optional bitcast.
    501     if (auto *BI = dyn_cast<BitCastInst>(Prev)) {
    502       RV = BI->getOperand(0);
    503       Prev = BI->getPrevNode();
    504       if (!Prev || RV != Prev)
    505         return nullptr;
    506     }
    507   }
    508 
    509   if (auto *CI = dyn_cast<CallInst>(Prev)) {
    510     if (CI->isMustTailCall())
    511       return CI;
    512   }
    513   return nullptr;
    514 }
    515 
    516 /// InlineFunction - This function inlines the called function into the basic
    517 /// block of the caller.  This returns false if it is not possible to inline
    518 /// this call.  The program is still in a well defined state if this occurs
    519 /// though.
    520 ///
    521 /// Note that this only does one level of inlining.  For example, if the
    522 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
    523 /// exists in the instruction stream.  Similarly this will inline a recursive
    524 /// function by one level.
    525 bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
    526                           bool InsertLifetime) {
    527   Instruction *TheCall = CS.getInstruction();
    528   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
    529          "Instruction not in function!");
    530 
    531   // If IFI has any state in it, zap it before we fill it in.
    532   IFI.reset();
    533 
    534   const Function *CalledFunc = CS.getCalledFunction();
    535   if (!CalledFunc ||              // Can't inline external function or indirect
    536       CalledFunc->isDeclaration() || // call, or call to a vararg function!
    537       CalledFunc->getFunctionType()->isVarArg()) return false;
    538 
    539   // If the call to the callee cannot throw, set the 'nounwind' flag on any
    540   // calls that we inline.
    541   bool MarkNoUnwind = CS.doesNotThrow();
    542 
    543   BasicBlock *OrigBB = TheCall->getParent();
    544   Function *Caller = OrigBB->getParent();
    545 
    546   // GC poses two hazards to inlining, which only occur when the callee has GC:
    547   //  1. If the caller has no GC, then the callee's GC must be propagated to the
    548   //     caller.
    549   //  2. If the caller has a differing GC, it is invalid to inline.
    550   if (CalledFunc->hasGC()) {
    551     if (!Caller->hasGC())
    552       Caller->setGC(CalledFunc->getGC());
    553     else if (CalledFunc->getGC() != Caller->getGC())
    554       return false;
    555   }
    556 
    557   // Get the personality function from the callee if it contains a landing pad.
    558   Value *CalleePersonality = nullptr;
    559   for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
    560        I != E; ++I)
    561     if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
    562       const BasicBlock *BB = II->getUnwindDest();
    563       const LandingPadInst *LP = BB->getLandingPadInst();
    564       CalleePersonality = LP->getPersonalityFn();
    565       break;
    566     }
    567 
    568   // Find the personality function used by the landing pads of the caller. If it
    569   // exists, then check to see that it matches the personality function used in
    570   // the callee.
    571   if (CalleePersonality) {
    572     for (Function::const_iterator I = Caller->begin(), E = Caller->end();
    573          I != E; ++I)
    574       if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
    575         const BasicBlock *BB = II->getUnwindDest();
    576         const LandingPadInst *LP = BB->getLandingPadInst();
    577 
    578         // If the personality functions match, then we can perform the
    579         // inlining. Otherwise, we can't inline.
    580         // TODO: This isn't 100% true. Some personality functions are proper
    581         //       supersets of others and can be used in place of the other.
    582         if (LP->getPersonalityFn() != CalleePersonality)
    583           return false;
    584 
    585         break;
    586       }
    587   }
    588 
    589   // Get an iterator to the last basic block in the function, which will have
    590   // the new function inlined after it.
    591   Function::iterator LastBlock = &Caller->back();
    592 
    593   // Make sure to capture all of the return instructions from the cloned
    594   // function.
    595   SmallVector<ReturnInst*, 8> Returns;
    596   ClonedCodeInfo InlinedFunctionInfo;
    597   Function::iterator FirstNewBlock;
    598 
    599   { // Scope to destroy VMap after cloning.
    600     ValueToValueMapTy VMap;
    601     // Keep a list of pair (dst, src) to emit byval initializations.
    602     SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
    603 
    604     assert(CalledFunc->arg_size() == CS.arg_size() &&
    605            "No varargs calls can be inlined!");
    606 
    607     // Calculate the vector of arguments to pass into the function cloner, which
    608     // matches up the formal to the actual argument values.
    609     CallSite::arg_iterator AI = CS.arg_begin();
    610     unsigned ArgNo = 0;
    611     for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
    612          E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
    613       Value *ActualArg = *AI;
    614 
    615       // When byval arguments actually inlined, we need to make the copy implied
    616       // by them explicit.  However, we don't do this if the callee is readonly
    617       // or readnone, because the copy would be unneeded: the callee doesn't
    618       // modify the struct.
    619       if (CS.isByValArgument(ArgNo)) {
    620         ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
    621                                         CalledFunc->getParamAlignment(ArgNo+1));
    622         if (ActualArg != *AI)
    623           ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
    624       }
    625 
    626       VMap[I] = ActualArg;
    627     }
    628 
    629     // We want the inliner to prune the code as it copies.  We would LOVE to
    630     // have no dead or constant instructions leftover after inlining occurs
    631     // (which can happen, e.g., because an argument was constant), but we'll be
    632     // happy with whatever the cloner can do.
    633     CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
    634                               /*ModuleLevelChanges=*/false, Returns, ".i",
    635                               &InlinedFunctionInfo, IFI.DL, TheCall);
    636 
    637     // Remember the first block that is newly cloned over.
    638     FirstNewBlock = LastBlock; ++FirstNewBlock;
    639 
    640     // Inject byval arguments initialization.
    641     for (std::pair<Value*, Value*> &Init : ByValInit)
    642       HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
    643                               FirstNewBlock, IFI);
    644 
    645     // Update the callgraph if requested.
    646     if (IFI.CG)
    647       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
    648 
    649     // Update inlined instructions' line number information.
    650     fixupLineNumbers(Caller, FirstNewBlock, TheCall);
    651   }
    652 
    653   // If there are any alloca instructions in the block that used to be the entry
    654   // block for the callee, move them to the entry block of the caller.  First
    655   // calculate which instruction they should be inserted before.  We insert the
    656   // instructions at the end of the current alloca list.
    657   {
    658     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    659     for (BasicBlock::iterator I = FirstNewBlock->begin(),
    660          E = FirstNewBlock->end(); I != E; ) {
    661       AllocaInst *AI = dyn_cast<AllocaInst>(I++);
    662       if (!AI) continue;
    663 
    664       // If the alloca is now dead, remove it.  This often occurs due to code
    665       // specialization.
    666       if (AI->use_empty()) {
    667         AI->eraseFromParent();
    668         continue;
    669       }
    670 
    671       if (!isa<Constant>(AI->getArraySize()))
    672         continue;
    673 
    674       // Keep track of the static allocas that we inline into the caller.
    675       IFI.StaticAllocas.push_back(AI);
    676 
    677       // Scan for the block of allocas that we can move over, and move them
    678       // all at once.
    679       while (isa<AllocaInst>(I) &&
    680              isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
    681         IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
    682         ++I;
    683       }
    684 
    685       // Transfer all of the allocas over in a block.  Using splice means
    686       // that the instructions aren't removed from the symbol table, then
    687       // reinserted.
    688       Caller->getEntryBlock().getInstList().splice(InsertPoint,
    689                                                    FirstNewBlock->getInstList(),
    690                                                    AI, I);
    691     }
    692   }
    693 
    694   bool InlinedMustTailCalls = false;
    695   if (InlinedFunctionInfo.ContainsCalls) {
    696     CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
    697     if (CallInst *CI = dyn_cast<CallInst>(TheCall))
    698       CallSiteTailKind = CI->getTailCallKind();
    699 
    700     for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
    701          ++BB) {
    702       for (Instruction &I : *BB) {
    703         CallInst *CI = dyn_cast<CallInst>(&I);
    704         if (!CI)
    705           continue;
    706 
    707         // We need to reduce the strength of any inlined tail calls.  For
    708         // musttail, we have to avoid introducing potential unbounded stack
    709         // growth.  For example, if functions 'f' and 'g' are mutually recursive
    710         // with musttail, we can inline 'g' into 'f' so long as we preserve
    711         // musttail on the cloned call to 'f'.  If either the inlined call site
    712         // or the cloned call site is *not* musttail, the program already has
    713         // one frame of stack growth, so it's safe to remove musttail.  Here is
    714         // a table of example transformations:
    715         //
    716         //    f -> musttail g -> musttail f  ==>  f -> musttail f
    717         //    f -> musttail g ->     tail f  ==>  f ->     tail f
    718         //    f ->          g -> musttail f  ==>  f ->          f
    719         //    f ->          g ->     tail f  ==>  f ->          f
    720         CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
    721         ChildTCK = std::min(CallSiteTailKind, ChildTCK);
    722         CI->setTailCallKind(ChildTCK);
    723         InlinedMustTailCalls |= CI->isMustTailCall();
    724 
    725         // Calls inlined through a 'nounwind' call site should be marked
    726         // 'nounwind'.
    727         if (MarkNoUnwind)
    728           CI->setDoesNotThrow();
    729       }
    730     }
    731   }
    732 
    733   // Leave lifetime markers for the static alloca's, scoping them to the
    734   // function we just inlined.
    735   if (InsertLifetime && !IFI.StaticAllocas.empty()) {
    736     IRBuilder<> builder(FirstNewBlock->begin());
    737     for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
    738       AllocaInst *AI = IFI.StaticAllocas[ai];
    739 
    740       // If the alloca is already scoped to something smaller than the whole
    741       // function then there's no need to add redundant, less accurate markers.
    742       if (hasLifetimeMarkers(AI))
    743         continue;
    744 
    745       // Try to determine the size of the allocation.
    746       ConstantInt *AllocaSize = nullptr;
    747       if (ConstantInt *AIArraySize =
    748           dyn_cast<ConstantInt>(AI->getArraySize())) {
    749         if (IFI.DL) {
    750           Type *AllocaType = AI->getAllocatedType();
    751           uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
    752           uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
    753           assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
    754           // Check that array size doesn't saturate uint64_t and doesn't
    755           // overflow when it's multiplied by type size.
    756           if (AllocaArraySize != ~0ULL &&
    757               UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
    758             AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
    759                                           AllocaArraySize * AllocaTypeSize);
    760           }
    761         }
    762       }
    763 
    764       builder.CreateLifetimeStart(AI, AllocaSize);
    765       for (ReturnInst *RI : Returns) {
    766         // Don't insert llvm.lifetime.end calls between a musttail call and a
    767         // return.  The return kills all local allocas.
    768         if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
    769           continue;
    770         IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
    771       }
    772     }
    773   }
    774 
    775   // If the inlined code contained dynamic alloca instructions, wrap the inlined
    776   // code with llvm.stacksave/llvm.stackrestore intrinsics.
    777   if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    778     Module *M = Caller->getParent();
    779     // Get the two intrinsics we care about.
    780     Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    781     Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
    782 
    783     // Insert the llvm.stacksave.
    784     CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
    785       .CreateCall(StackSave, "savedstack");
    786 
    787     // Insert a call to llvm.stackrestore before any return instructions in the
    788     // inlined function.
    789     for (ReturnInst *RI : Returns) {
    790       // Don't insert llvm.stackrestore calls between a musttail call and a
    791       // return.  The return will restore the stack pointer.
    792       if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
    793         continue;
    794       IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
    795     }
    796   }
    797 
    798   // If we are inlining for an invoke instruction, we must make sure to rewrite
    799   // any call instructions into invoke instructions.
    800   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    801     HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
    802 
    803   // Handle any inlined musttail call sites.  In order for a new call site to be
    804   // musttail, the source of the clone and the inlined call site must have been
    805   // musttail.  Therefore it's safe to return without merging control into the
    806   // phi below.
    807   if (InlinedMustTailCalls) {
    808     // Check if we need to bitcast the result of any musttail calls.
    809     Type *NewRetTy = Caller->getReturnType();
    810     bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
    811 
    812     // Handle the returns preceded by musttail calls separately.
    813     SmallVector<ReturnInst *, 8> NormalReturns;
    814     for (ReturnInst *RI : Returns) {
    815       CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI);
    816       if (!ReturnedMustTail) {
    817         NormalReturns.push_back(RI);
    818         continue;
    819       }
    820       if (!NeedBitCast)
    821         continue;
    822 
    823       // Delete the old return and any preceding bitcast.
    824       BasicBlock *CurBB = RI->getParent();
    825       auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
    826       RI->eraseFromParent();
    827       if (OldCast)
    828         OldCast->eraseFromParent();
    829 
    830       // Insert a new bitcast and return with the right type.
    831       IRBuilder<> Builder(CurBB);
    832       Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
    833     }
    834 
    835     // Leave behind the normal returns so we can merge control flow.
    836     std::swap(Returns, NormalReturns);
    837   }
    838 
    839   // If we cloned in _exactly one_ basic block, and if that block ends in a
    840   // return instruction, we splice the body of the inlined callee directly into
    841   // the calling basic block.
    842   if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    843     // Move all of the instructions right before the call.
    844     OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
    845                                  FirstNewBlock->begin(), FirstNewBlock->end());
    846     // Remove the cloned basic block.
    847     Caller->getBasicBlockList().pop_back();
    848 
    849     // If the call site was an invoke instruction, add a branch to the normal
    850     // destination.
    851     if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
    852       BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
    853       NewBr->setDebugLoc(Returns[0]->getDebugLoc());
    854     }
    855 
    856     // If the return instruction returned a value, replace uses of the call with
    857     // uses of the returned value.
    858     if (!TheCall->use_empty()) {
    859       ReturnInst *R = Returns[0];
    860       if (TheCall == R->getReturnValue())
    861         TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
    862       else
    863         TheCall->replaceAllUsesWith(R->getReturnValue());
    864     }
    865     // Since we are now done with the Call/Invoke, we can delete it.
    866     TheCall->eraseFromParent();
    867 
    868     // Since we are now done with the return instruction, delete it also.
    869     Returns[0]->eraseFromParent();
    870 
    871     // We are now done with the inlining.
    872     return true;
    873   }
    874 
    875   // Otherwise, we have the normal case, of more than one block to inline or
    876   // multiple return sites.
    877 
    878   // We want to clone the entire callee function into the hole between the
    879   // "starter" and "ender" blocks.  How we accomplish this depends on whether
    880   // this is an invoke instruction or a call instruction.
    881   BasicBlock *AfterCallBB;
    882   BranchInst *CreatedBranchToNormalDest = nullptr;
    883   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
    884 
    885     // Add an unconditional branch to make this look like the CallInst case...
    886     CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
    887 
    888     // Split the basic block.  This guarantees that no PHI nodes will have to be
    889     // updated due to new incoming edges, and make the invoke case more
    890     // symmetric to the call case.
    891     AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
    892                                           CalledFunc->getName()+".exit");
    893 
    894   } else {  // It's a call
    895     // If this is a call instruction, we need to split the basic block that
    896     // the call lives in.
    897     //
    898     AfterCallBB = OrigBB->splitBasicBlock(TheCall,
    899                                           CalledFunc->getName()+".exit");
    900   }
    901 
    902   // Change the branch that used to go to AfterCallBB to branch to the first
    903   // basic block of the inlined function.
    904   //
    905   TerminatorInst *Br = OrigBB->getTerminator();
    906   assert(Br && Br->getOpcode() == Instruction::Br &&
    907          "splitBasicBlock broken!");
    908   Br->setOperand(0, FirstNewBlock);
    909 
    910 
    911   // Now that the function is correct, make it a little bit nicer.  In
    912   // particular, move the basic blocks inserted from the end of the function
    913   // into the space made by splitting the source basic block.
    914   Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
    915                                      FirstNewBlock, Caller->end());
    916 
    917   // Handle all of the return instructions that we just cloned in, and eliminate
    918   // any users of the original call/invoke instruction.
    919   Type *RTy = CalledFunc->getReturnType();
    920 
    921   PHINode *PHI = nullptr;
    922   if (Returns.size() > 1) {
    923     // The PHI node should go at the front of the new basic block to merge all
    924     // possible incoming values.
    925     if (!TheCall->use_empty()) {
    926       PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
    927                             AfterCallBB->begin());
    928       // Anything that used the result of the function call should now use the
    929       // PHI node as their operand.
    930       TheCall->replaceAllUsesWith(PHI);
    931     }
    932 
    933     // Loop over all of the return instructions adding entries to the PHI node
    934     // as appropriate.
    935     if (PHI) {
    936       for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
    937         ReturnInst *RI = Returns[i];
    938         assert(RI->getReturnValue()->getType() == PHI->getType() &&
    939                "Ret value not consistent in function!");
    940         PHI->addIncoming(RI->getReturnValue(), RI->getParent());
    941       }
    942     }
    943 
    944 
    945     // Add a branch to the merge points and remove return instructions.
    946     DebugLoc Loc;
    947     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
    948       ReturnInst *RI = Returns[i];
    949       BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
    950       Loc = RI->getDebugLoc();
    951       BI->setDebugLoc(Loc);
    952       RI->eraseFromParent();
    953     }
    954     // We need to set the debug location to *somewhere* inside the
    955     // inlined function. The line number may be nonsensical, but the
    956     // instruction will at least be associated with the right
    957     // function.
    958     if (CreatedBranchToNormalDest)
    959       CreatedBranchToNormalDest->setDebugLoc(Loc);
    960   } else if (!Returns.empty()) {
    961     // Otherwise, if there is exactly one return value, just replace anything
    962     // using the return value of the call with the computed value.
    963     if (!TheCall->use_empty()) {
    964       if (TheCall == Returns[0]->getReturnValue())
    965         TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
    966       else
    967         TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
    968     }
    969 
    970     // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    971     BasicBlock *ReturnBB = Returns[0]->getParent();
    972     ReturnBB->replaceAllUsesWith(AfterCallBB);
    973 
    974     // Splice the code from the return block into the block that it will return
    975     // to, which contains the code that was after the call.
    976     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
    977                                       ReturnBB->getInstList());
    978 
    979     if (CreatedBranchToNormalDest)
    980       CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
    981 
    982     // Delete the return instruction now and empty ReturnBB now.
    983     Returns[0]->eraseFromParent();
    984     ReturnBB->eraseFromParent();
    985   } else if (!TheCall->use_empty()) {
    986     // No returns, but something is using the return value of the call.  Just
    987     // nuke the result.
    988     TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
    989   }
    990 
    991   // Since we are now done with the Call/Invoke, we can delete it.
    992   TheCall->eraseFromParent();
    993 
    994   // If we inlined any musttail calls and the original return is now
    995   // unreachable, delete it.  It can only contain a bitcast and ret.
    996   if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
    997     AfterCallBB->eraseFromParent();
    998 
    999   // We should always be able to fold the entry block of the function into the
   1000   // single predecessor of the block...
   1001   assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
   1002   BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
   1003 
   1004   // Splice the code entry block into calling block, right before the
   1005   // unconditional branch.
   1006   CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
   1007   OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
   1008 
   1009   // Remove the unconditional branch.
   1010   OrigBB->getInstList().erase(Br);
   1011 
   1012   // Now we can remove the CalleeEntry block, which is now empty.
   1013   Caller->getBasicBlockList().erase(CalleeEntry);
   1014 
   1015   // If we inserted a phi node, check to see if it has a single value (e.g. all
   1016   // the entries are the same or undef).  If so, remove the PHI so it doesn't
   1017   // block other optimizations.
   1018   if (PHI) {
   1019     if (Value *V = SimplifyInstruction(PHI, IFI.DL)) {
   1020       PHI->replaceAllUsesWith(V);
   1021       PHI->eraseFromParent();
   1022     }
   1023   }
   1024 
   1025   return true;
   1026 }
   1027