Home | History | Annotate | Download | only in Utils
      1 //===- InlineFunction.cpp - Code to perform function inlining -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements inlining of a function into a call site, resolving
     11 // parameters and the return value as appropriate.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/Transforms/Utils/Cloning.h"
     16 #include "llvm/ADT/SmallVector.h"
     17 #include "llvm/ADT/StringExtras.h"
     18 #include "llvm/Analysis/CallGraph.h"
     19 #include "llvm/Analysis/InstructionSimplify.h"
     20 #include "llvm/DebugInfo.h"
     21 #include "llvm/IR/Attributes.h"
     22 #include "llvm/IR/Constants.h"
     23 #include "llvm/IR/DataLayout.h"
     24 #include "llvm/IR/DerivedTypes.h"
     25 #include "llvm/IR/IRBuilder.h"
     26 #include "llvm/IR/Instructions.h"
     27 #include "llvm/IR/IntrinsicInst.h"
     28 #include "llvm/IR/Intrinsics.h"
     29 #include "llvm/IR/Module.h"
     30 #include "llvm/Support/CallSite.h"
     31 #include "llvm/Transforms/Utils/Local.h"
     32 using namespace llvm;
     33 
     34 bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
     35                           bool InsertLifetime) {
     36   return InlineFunction(CallSite(CI), IFI, InsertLifetime);
     37 }
     38 bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
     39                           bool InsertLifetime) {
     40   return InlineFunction(CallSite(II), IFI, InsertLifetime);
     41 }
     42 
     43 namespace {
     44   /// A class for recording information about inlining through an invoke.
     45   class InvokeInliningInfo {
     46     BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
     47     BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
     48     LandingPadInst *CallerLPad;  ///< LandingPadInst associated with the invoke.
     49     PHINode *InnerEHValuesPHI;   ///< PHI for EH values from landingpad insts.
     50     SmallVector<Value*, 8> UnwindDestPHIValues;
     51 
     52   public:
     53     InvokeInliningInfo(InvokeInst *II)
     54       : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
     55         CallerLPad(0), InnerEHValuesPHI(0) {
     56       // If there are PHI nodes in the unwind destination block, we need to keep
     57       // track of which values came into them from the invoke before removing
     58       // the edge from this block.
     59       llvm::BasicBlock *InvokeBB = II->getParent();
     60       BasicBlock::iterator I = OuterResumeDest->begin();
     61       for (; isa<PHINode>(I); ++I) {
     62         // Save the value to use for this edge.
     63         PHINode *PHI = cast<PHINode>(I);
     64         UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
     65       }
     66 
     67       CallerLPad = cast<LandingPadInst>(I);
     68     }
     69 
     70     /// getOuterResumeDest - The outer unwind destination is the target of
     71     /// unwind edges introduced for calls within the inlined function.
     72     BasicBlock *getOuterResumeDest() const {
     73       return OuterResumeDest;
     74     }
     75 
     76     BasicBlock *getInnerResumeDest();
     77 
     78     LandingPadInst *getLandingPadInst() const { return CallerLPad; }
     79 
     80     /// forwardResume - Forward the 'resume' instruction to the caller's landing
     81     /// pad block. When the landing pad block has only one predecessor, this is
     82     /// a simple branch. When there is more than one predecessor, we need to
     83     /// split the landing pad block after the landingpad instruction and jump
     84     /// to there.
     85     void forwardResume(ResumeInst *RI,
     86                        SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
     87 
     88     /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
     89     /// destination block for the given basic block, using the values for the
     90     /// original invoke's source block.
     91     void addIncomingPHIValuesFor(BasicBlock *BB) const {
     92       addIncomingPHIValuesForInto(BB, OuterResumeDest);
     93     }
     94 
     95     void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
     96       BasicBlock::iterator I = dest->begin();
     97       for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
     98         PHINode *phi = cast<PHINode>(I);
     99         phi->addIncoming(UnwindDestPHIValues[i], src);
    100       }
    101     }
    102   };
    103 }
    104 
    105 /// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
    106 BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
    107   if (InnerResumeDest) return InnerResumeDest;
    108 
    109   // Split the landing pad.
    110   BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
    111   InnerResumeDest =
    112     OuterResumeDest->splitBasicBlock(SplitPoint,
    113                                      OuterResumeDest->getName() + ".body");
    114 
    115   // The number of incoming edges we expect to the inner landing pad.
    116   const unsigned PHICapacity = 2;
    117 
    118   // Create corresponding new PHIs for all the PHIs in the outer landing pad.
    119   BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
    120   BasicBlock::iterator I = OuterResumeDest->begin();
    121   for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
    122     PHINode *OuterPHI = cast<PHINode>(I);
    123     PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
    124                                         OuterPHI->getName() + ".lpad-body",
    125                                         InsertPoint);
    126     OuterPHI->replaceAllUsesWith(InnerPHI);
    127     InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
    128   }
    129 
    130   // Create a PHI for the exception values.
    131   InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
    132                                      "eh.lpad-body", InsertPoint);
    133   CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
    134   InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
    135 
    136   // All done.
    137   return InnerResumeDest;
    138 }
    139 
    140 /// forwardResume - Forward the 'resume' instruction to the caller's landing pad
    141 /// block. When the landing pad block has only one predecessor, this is a simple
    142 /// branch. When there is more than one predecessor, we need to split the
    143 /// landing pad block after the landingpad instruction and jump to there.
    144 void InvokeInliningInfo::forwardResume(ResumeInst *RI,
    145                                SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
    146   BasicBlock *Dest = getInnerResumeDest();
    147   LandingPadInst *OuterLPad = getLandingPadInst();
    148   BasicBlock *Src = RI->getParent();
    149 
    150   BranchInst::Create(Dest, Src);
    151 
    152   // Update the PHIs in the destination. They were inserted in an order which
    153   // makes this work.
    154   addIncomingPHIValuesForInto(Src, Dest);
    155 
    156   InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
    157   RI->eraseFromParent();
    158 
    159   // Append the clauses from the outer landing pad instruction into the inlined
    160   // landing pad instructions.
    161   for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
    162          E = InlinedLPads.end(); I != E; ++I) {
    163     LandingPadInst *InlinedLPad = *I;
    164     for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
    165          OuterIdx != OuterNum; ++OuterIdx)
    166       InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
    167   }
    168 }
    169 
    170 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
    171 /// an invoke, we have to turn all of the calls that can throw into
    172 /// invokes.  This function analyze BB to see if there are any calls, and if so,
    173 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
    174 /// nodes in that block with the values specified in InvokeDestPHIValues.
    175 ///
    176 /// Returns true to indicate that the next block should be skipped.
    177 static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
    178                                                    InvokeInliningInfo &Invoke) {
    179   LandingPadInst *LPI = Invoke.getLandingPadInst();
    180 
    181   for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
    182     Instruction *I = BBI++;
    183 
    184     if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
    185       unsigned NumClauses = LPI->getNumClauses();
    186       L->reserveClauses(NumClauses);
    187       for (unsigned i = 0; i != NumClauses; ++i)
    188         L->addClause(LPI->getClause(i));
    189     }
    190 
    191     // We only need to check for function calls: inlined invoke
    192     // instructions require no special handling.
    193     CallInst *CI = dyn_cast<CallInst>(I);
    194 
    195     // If this call cannot unwind, don't convert it to an invoke.
    196     if (!CI || CI->doesNotThrow())
    197       continue;
    198 
    199     // Convert this function call into an invoke instruction.  First, split the
    200     // basic block.
    201     BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
    202 
    203     // Delete the unconditional branch inserted by splitBasicBlock
    204     BB->getInstList().pop_back();
    205 
    206     // Create the new invoke instruction.
    207     ImmutableCallSite CS(CI);
    208     SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
    209     InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
    210                                         Invoke.getOuterResumeDest(),
    211                                         InvokeArgs, CI->getName(), BB);
    212     II->setCallingConv(CI->getCallingConv());
    213     II->setAttributes(CI->getAttributes());
    214 
    215     // Make sure that anything using the call now uses the invoke!  This also
    216     // updates the CallGraph if present, because it uses a WeakVH.
    217     CI->replaceAllUsesWith(II);
    218 
    219     // Delete the original call
    220     Split->getInstList().pop_front();
    221 
    222     // Update any PHI nodes in the exceptional block to indicate that there is
    223     // now a new entry in them.
    224     Invoke.addIncomingPHIValuesFor(BB);
    225     return false;
    226   }
    227 
    228   return false;
    229 }
    230 
    231 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
    232 /// in the body of the inlined function into invokes.
    233 ///
    234 /// II is the invoke instruction being inlined.  FirstNewBlock is the first
    235 /// block of the inlined code (the last block is the end of the function),
    236 /// and InlineCodeInfo is information about the code that got inlined.
    237 static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
    238                                 ClonedCodeInfo &InlinedCodeInfo) {
    239   BasicBlock *InvokeDest = II->getUnwindDest();
    240 
    241   Function *Caller = FirstNewBlock->getParent();
    242 
    243   // The inlined code is currently at the end of the function, scan from the
    244   // start of the inlined code to its end, checking for stuff we need to
    245   // rewrite.
    246   InvokeInliningInfo Invoke(II);
    247 
    248   // Get all of the inlined landing pad instructions.
    249   SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
    250   for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
    251     if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
    252       InlinedLPads.insert(II->getLandingPadInst());
    253 
    254   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
    255     if (InlinedCodeInfo.ContainsCalls)
    256       if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
    257         // Honor a request to skip the next block.
    258         ++BB;
    259         continue;
    260       }
    261 
    262     // Forward any resumes that are remaining here.
    263     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
    264       Invoke.forwardResume(RI, InlinedLPads);
    265   }
    266 
    267   // Now that everything is happy, we have one final detail.  The PHI nodes in
    268   // the exception destination block still have entries due to the original
    269   // invoke instruction. Eliminate these entries (which might even delete the
    270   // PHI node) now.
    271   InvokeDest->removePredecessor(II->getParent());
    272 }
    273 
    274 /// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
    275 /// into the caller, update the specified callgraph to reflect the changes we
    276 /// made.  Note that it's possible that not all code was copied over, so only
    277 /// some edges of the callgraph may remain.
    278 static void UpdateCallGraphAfterInlining(CallSite CS,
    279                                          Function::iterator FirstNewBlock,
    280                                          ValueToValueMapTy &VMap,
    281                                          InlineFunctionInfo &IFI) {
    282   CallGraph &CG = *IFI.CG;
    283   const Function *Caller = CS.getInstruction()->getParent()->getParent();
    284   const Function *Callee = CS.getCalledFunction();
    285   CallGraphNode *CalleeNode = CG[Callee];
    286   CallGraphNode *CallerNode = CG[Caller];
    287 
    288   // Since we inlined some uninlined call sites in the callee into the caller,
    289   // add edges from the caller to all of the callees of the callee.
    290   CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
    291 
    292   // Consider the case where CalleeNode == CallerNode.
    293   CallGraphNode::CalledFunctionsVector CallCache;
    294   if (CalleeNode == CallerNode) {
    295     CallCache.assign(I, E);
    296     I = CallCache.begin();
    297     E = CallCache.end();
    298   }
    299 
    300   for (; I != E; ++I) {
    301     const Value *OrigCall = I->first;
    302 
    303     ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
    304     // Only copy the edge if the call was inlined!
    305     if (VMI == VMap.end() || VMI->second == 0)
    306       continue;
    307 
    308     // If the call was inlined, but then constant folded, there is no edge to
    309     // add.  Check for this case.
    310     Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
    311     if (NewCall == 0) continue;
    312 
    313     // Remember that this call site got inlined for the client of
    314     // InlineFunction.
    315     IFI.InlinedCalls.push_back(NewCall);
    316 
    317     // It's possible that inlining the callsite will cause it to go from an
    318     // indirect to a direct call by resolving a function pointer.  If this
    319     // happens, set the callee of the new call site to a more precise
    320     // destination.  This can also happen if the call graph node of the caller
    321     // was just unnecessarily imprecise.
    322     if (I->second->getFunction() == 0)
    323       if (Function *F = CallSite(NewCall).getCalledFunction()) {
    324         // Indirect call site resolved to direct call.
    325         CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
    326 
    327         continue;
    328       }
    329 
    330     CallerNode->addCalledFunction(CallSite(NewCall), I->second);
    331   }
    332 
    333   // Update the call graph by deleting the edge from Callee to Caller.  We must
    334   // do this after the loop above in case Caller and Callee are the same.
    335   CallerNode->removeCallEdgeFor(CS);
    336 }
    337 
    338 /// HandleByValArgument - When inlining a call site that has a byval argument,
    339 /// we have to make the implicit memcpy explicit by adding it.
    340 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
    341                                   const Function *CalledFunc,
    342                                   InlineFunctionInfo &IFI,
    343                                   unsigned ByValAlignment) {
    344   Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
    345 
    346   // If the called function is readonly, then it could not mutate the caller's
    347   // copy of the byval'd memory.  In this case, it is safe to elide the copy and
    348   // temporary.
    349   if (CalledFunc->onlyReadsMemory()) {
    350     // If the byval argument has a specified alignment that is greater than the
    351     // passed in pointer, then we either have to round up the input pointer or
    352     // give up on this transformation.
    353     if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
    354       return Arg;
    355 
    356     // If the pointer is already known to be sufficiently aligned, or if we can
    357     // round it up to a larger alignment, then we don't need a temporary.
    358     if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
    359                                    IFI.TD) >= ByValAlignment)
    360       return Arg;
    361 
    362     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
    363     // for code quality, but rarely happens and is required for correctness.
    364   }
    365 
    366   LLVMContext &Context = Arg->getContext();
    367 
    368   Type *VoidPtrTy = Type::getInt8PtrTy(Context);
    369 
    370   // Create the alloca.  If we have DataLayout, use nice alignment.
    371   unsigned Align = 1;
    372   if (IFI.TD)
    373     Align = IFI.TD->getPrefTypeAlignment(AggTy);
    374 
    375   // If the byval had an alignment specified, we *must* use at least that
    376   // alignment, as it is required by the byval argument (and uses of the
    377   // pointer inside the callee).
    378   Align = std::max(Align, ByValAlignment);
    379 
    380   Function *Caller = TheCall->getParent()->getParent();
    381 
    382   Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
    383                                     &*Caller->begin()->begin());
    384   // Emit a memcpy.
    385   Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
    386   Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
    387                                                  Intrinsic::memcpy,
    388                                                  Tys);
    389   Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
    390   Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
    391 
    392   Value *Size;
    393   if (IFI.TD == 0)
    394     Size = ConstantExpr::getSizeOf(AggTy);
    395   else
    396     Size = ConstantInt::get(Type::getInt64Ty(Context),
    397                             IFI.TD->getTypeStoreSize(AggTy));
    398 
    399   // Always generate a memcpy of alignment 1 here because we don't know
    400   // the alignment of the src pointer.  Other optimizations can infer
    401   // better alignment.
    402   Value *CallArgs[] = {
    403     DestCast, SrcCast, Size,
    404     ConstantInt::get(Type::getInt32Ty(Context), 1),
    405     ConstantInt::getFalse(Context) // isVolatile
    406   };
    407   IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
    408 
    409   // Uses of the argument in the function should use our new alloca
    410   // instead.
    411   return NewAlloca;
    412 }
    413 
    414 // isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
    415 // intrinsic.
    416 static bool isUsedByLifetimeMarker(Value *V) {
    417   for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
    418        ++UI) {
    419     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
    420       switch (II->getIntrinsicID()) {
    421       default: break;
    422       case Intrinsic::lifetime_start:
    423       case Intrinsic::lifetime_end:
    424         return true;
    425       }
    426     }
    427   }
    428   return false;
    429 }
    430 
    431 // hasLifetimeMarkers - Check whether the given alloca already has
    432 // lifetime.start or lifetime.end intrinsics.
    433 static bool hasLifetimeMarkers(AllocaInst *AI) {
    434   Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
    435   if (AI->getType() == Int8PtrTy)
    436     return isUsedByLifetimeMarker(AI);
    437 
    438   // Do a scan to find all the casts to i8*.
    439   for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
    440        ++I) {
    441     if (I->getType() != Int8PtrTy) continue;
    442     if (I->stripPointerCasts() != AI) continue;
    443     if (isUsedByLifetimeMarker(*I))
    444       return true;
    445   }
    446   return false;
    447 }
    448 
    449 /// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
    450 /// recursively update InlinedAtEntry of a DebugLoc.
    451 static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
    452                                     const DebugLoc &InlinedAtDL,
    453                                     LLVMContext &Ctx) {
    454   if (MDNode *IA = DL.getInlinedAt(Ctx)) {
    455     DebugLoc NewInlinedAtDL
    456       = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
    457     return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
    458                          NewInlinedAtDL.getAsMDNode(Ctx));
    459   }
    460 
    461   return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
    462                        InlinedAtDL.getAsMDNode(Ctx));
    463 }
    464 
    465 /// fixupLineNumbers - Update inlined instructions' line numbers to
    466 /// to encode location where these instructions are inlined.
    467 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
    468                              Instruction *TheCall) {
    469   DebugLoc TheCallDL = TheCall->getDebugLoc();
    470   if (TheCallDL.isUnknown())
    471     return;
    472 
    473   for (; FI != Fn->end(); ++FI) {
    474     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
    475          BI != BE; ++BI) {
    476       DebugLoc DL = BI->getDebugLoc();
    477       if (!DL.isUnknown()) {
    478         BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
    479         if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
    480           LLVMContext &Ctx = BI->getContext();
    481           MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
    482           DVI->setOperand(2, createInlinedVariable(DVI->getVariable(),
    483                                                    InlinedAt, Ctx));
    484         }
    485       }
    486     }
    487   }
    488 }
    489 
    490 /// InlineFunction - This function inlines the called function into the basic
    491 /// block of the caller.  This returns false if it is not possible to inline
    492 /// this call.  The program is still in a well defined state if this occurs
    493 /// though.
    494 ///
    495 /// Note that this only does one level of inlining.  For example, if the
    496 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
    497 /// exists in the instruction stream.  Similarly this will inline a recursive
    498 /// function by one level.
    499 bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
    500                           bool InsertLifetime) {
    501   Instruction *TheCall = CS.getInstruction();
    502   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
    503          "Instruction not in function!");
    504 
    505   // If IFI has any state in it, zap it before we fill it in.
    506   IFI.reset();
    507 
    508   const Function *CalledFunc = CS.getCalledFunction();
    509   if (CalledFunc == 0 ||          // Can't inline external function or indirect
    510       CalledFunc->isDeclaration() || // call, or call to a vararg function!
    511       CalledFunc->getFunctionType()->isVarArg()) return false;
    512 
    513   // If the call to the callee is not a tail call, we must clear the 'tail'
    514   // flags on any calls that we inline.
    515   bool MustClearTailCallFlags =
    516     !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
    517 
    518   // If the call to the callee cannot throw, set the 'nounwind' flag on any
    519   // calls that we inline.
    520   bool MarkNoUnwind = CS.doesNotThrow();
    521 
    522   BasicBlock *OrigBB = TheCall->getParent();
    523   Function *Caller = OrigBB->getParent();
    524 
    525   // GC poses two hazards to inlining, which only occur when the callee has GC:
    526   //  1. If the caller has no GC, then the callee's GC must be propagated to the
    527   //     caller.
    528   //  2. If the caller has a differing GC, it is invalid to inline.
    529   if (CalledFunc->hasGC()) {
    530     if (!Caller->hasGC())
    531       Caller->setGC(CalledFunc->getGC());
    532     else if (CalledFunc->getGC() != Caller->getGC())
    533       return false;
    534   }
    535 
    536   // Get the personality function from the callee if it contains a landing pad.
    537   Value *CalleePersonality = 0;
    538   for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
    539        I != E; ++I)
    540     if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
    541       const BasicBlock *BB = II->getUnwindDest();
    542       const LandingPadInst *LP = BB->getLandingPadInst();
    543       CalleePersonality = LP->getPersonalityFn();
    544       break;
    545     }
    546 
    547   // Find the personality function used by the landing pads of the caller. If it
    548   // exists, then check to see that it matches the personality function used in
    549   // the callee.
    550   if (CalleePersonality) {
    551     for (Function::const_iterator I = Caller->begin(), E = Caller->end();
    552          I != E; ++I)
    553       if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
    554         const BasicBlock *BB = II->getUnwindDest();
    555         const LandingPadInst *LP = BB->getLandingPadInst();
    556 
    557         // If the personality functions match, then we can perform the
    558         // inlining. Otherwise, we can't inline.
    559         // TODO: This isn't 100% true. Some personality functions are proper
    560         //       supersets of others and can be used in place of the other.
    561         if (LP->getPersonalityFn() != CalleePersonality)
    562           return false;
    563 
    564         break;
    565       }
    566   }
    567 
    568   // Get an iterator to the last basic block in the function, which will have
    569   // the new function inlined after it.
    570   Function::iterator LastBlock = &Caller->back();
    571 
    572   // Make sure to capture all of the return instructions from the cloned
    573   // function.
    574   SmallVector<ReturnInst*, 8> Returns;
    575   ClonedCodeInfo InlinedFunctionInfo;
    576   Function::iterator FirstNewBlock;
    577 
    578   { // Scope to destroy VMap after cloning.
    579     ValueToValueMapTy VMap;
    580 
    581     assert(CalledFunc->arg_size() == CS.arg_size() &&
    582            "No varargs calls can be inlined!");
    583 
    584     // Calculate the vector of arguments to pass into the function cloner, which
    585     // matches up the formal to the actual argument values.
    586     CallSite::arg_iterator AI = CS.arg_begin();
    587     unsigned ArgNo = 0;
    588     for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
    589          E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
    590       Value *ActualArg = *AI;
    591 
    592       // When byval arguments actually inlined, we need to make the copy implied
    593       // by them explicit.  However, we don't do this if the callee is readonly
    594       // or readnone, because the copy would be unneeded: the callee doesn't
    595       // modify the struct.
    596       if (CS.isByValArgument(ArgNo)) {
    597         ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
    598                                         CalledFunc->getParamAlignment(ArgNo+1));
    599 
    600         // Calls that we inline may use the new alloca, so we need to clear
    601         // their 'tail' flags if HandleByValArgument introduced a new alloca and
    602         // the callee has calls.
    603         MustClearTailCallFlags |= ActualArg != *AI;
    604       }
    605 
    606       VMap[I] = ActualArg;
    607     }
    608 
    609     // We want the inliner to prune the code as it copies.  We would LOVE to
    610     // have no dead or constant instructions leftover after inlining occurs
    611     // (which can happen, e.g., because an argument was constant), but we'll be
    612     // happy with whatever the cloner can do.
    613     CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
    614                               /*ModuleLevelChanges=*/false, Returns, ".i",
    615                               &InlinedFunctionInfo, IFI.TD, TheCall);
    616 
    617     // Remember the first block that is newly cloned over.
    618     FirstNewBlock = LastBlock; ++FirstNewBlock;
    619 
    620     // Update the callgraph if requested.
    621     if (IFI.CG)
    622       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
    623 
    624     // Update inlined instructions' line number information.
    625     fixupLineNumbers(Caller, FirstNewBlock, TheCall);
    626   }
    627 
    628   // If there are any alloca instructions in the block that used to be the entry
    629   // block for the callee, move them to the entry block of the caller.  First
    630   // calculate which instruction they should be inserted before.  We insert the
    631   // instructions at the end of the current alloca list.
    632   {
    633     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    634     for (BasicBlock::iterator I = FirstNewBlock->begin(),
    635          E = FirstNewBlock->end(); I != E; ) {
    636       AllocaInst *AI = dyn_cast<AllocaInst>(I++);
    637       if (AI == 0) continue;
    638 
    639       // If the alloca is now dead, remove it.  This often occurs due to code
    640       // specialization.
    641       if (AI->use_empty()) {
    642         AI->eraseFromParent();
    643         continue;
    644       }
    645 
    646       if (!isa<Constant>(AI->getArraySize()))
    647         continue;
    648 
    649       // Keep track of the static allocas that we inline into the caller.
    650       IFI.StaticAllocas.push_back(AI);
    651 
    652       // Scan for the block of allocas that we can move over, and move them
    653       // all at once.
    654       while (isa<AllocaInst>(I) &&
    655              isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
    656         IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
    657         ++I;
    658       }
    659 
    660       // Transfer all of the allocas over in a block.  Using splice means
    661       // that the instructions aren't removed from the symbol table, then
    662       // reinserted.
    663       Caller->getEntryBlock().getInstList().splice(InsertPoint,
    664                                                    FirstNewBlock->getInstList(),
    665                                                    AI, I);
    666     }
    667   }
    668 
    669   // Leave lifetime markers for the static alloca's, scoping them to the
    670   // function we just inlined.
    671   if (InsertLifetime && !IFI.StaticAllocas.empty()) {
    672     IRBuilder<> builder(FirstNewBlock->begin());
    673     for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
    674       AllocaInst *AI = IFI.StaticAllocas[ai];
    675 
    676       // If the alloca is already scoped to something smaller than the whole
    677       // function then there's no need to add redundant, less accurate markers.
    678       if (hasLifetimeMarkers(AI))
    679         continue;
    680 
    681       // Try to determine the size of the allocation.
    682       ConstantInt *AllocaSize = 0;
    683       if (ConstantInt *AIArraySize =
    684           dyn_cast<ConstantInt>(AI->getArraySize())) {
    685         if (IFI.TD) {
    686           Type *AllocaType = AI->getAllocatedType();
    687           uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
    688           uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
    689           assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
    690           // Check that array size doesn't saturate uint64_t and doesn't
    691           // overflow when it's multiplied by type size.
    692           if (AllocaArraySize != ~0ULL &&
    693               UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
    694             AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
    695                                           AllocaArraySize * AllocaTypeSize);
    696           }
    697         }
    698       }
    699 
    700       builder.CreateLifetimeStart(AI, AllocaSize);
    701       for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
    702         IRBuilder<> builder(Returns[ri]);
    703         builder.CreateLifetimeEnd(AI, AllocaSize);
    704       }
    705     }
    706   }
    707 
    708   // If the inlined code contained dynamic alloca instructions, wrap the inlined
    709   // code with llvm.stacksave/llvm.stackrestore intrinsics.
    710   if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    711     Module *M = Caller->getParent();
    712     // Get the two intrinsics we care about.
    713     Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    714     Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
    715 
    716     // Insert the llvm.stacksave.
    717     CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
    718       .CreateCall(StackSave, "savedstack");
    719 
    720     // Insert a call to llvm.stackrestore before any return instructions in the
    721     // inlined function.
    722     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
    723       IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
    724     }
    725   }
    726 
    727   // If we are inlining tail call instruction through a call site that isn't
    728   // marked 'tail', we must remove the tail marker for any calls in the inlined
    729   // code.  Also, calls inlined through a 'nounwind' call site should be marked
    730   // 'nounwind'.
    731   if (InlinedFunctionInfo.ContainsCalls &&
    732       (MustClearTailCallFlags || MarkNoUnwind)) {
    733     for (Function::iterator BB = FirstNewBlock, E = Caller->end();
    734          BB != E; ++BB)
    735       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
    736         if (CallInst *CI = dyn_cast<CallInst>(I)) {
    737           if (MustClearTailCallFlags)
    738             CI->setTailCall(false);
    739           if (MarkNoUnwind)
    740             CI->setDoesNotThrow();
    741         }
    742   }
    743 
    744   // If we are inlining for an invoke instruction, we must make sure to rewrite
    745   // any call instructions into invoke instructions.
    746   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    747     HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
    748 
    749   // If we cloned in _exactly one_ basic block, and if that block ends in a
    750   // return instruction, we splice the body of the inlined callee directly into
    751   // the calling basic block.
    752   if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    753     // Move all of the instructions right before the call.
    754     OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
    755                                  FirstNewBlock->begin(), FirstNewBlock->end());
    756     // Remove the cloned basic block.
    757     Caller->getBasicBlockList().pop_back();
    758 
    759     // If the call site was an invoke instruction, add a branch to the normal
    760     // destination.
    761     if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
    762       BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
    763       NewBr->setDebugLoc(Returns[0]->getDebugLoc());
    764     }
    765 
    766     // If the return instruction returned a value, replace uses of the call with
    767     // uses of the returned value.
    768     if (!TheCall->use_empty()) {
    769       ReturnInst *R = Returns[0];
    770       if (TheCall == R->getReturnValue())
    771         TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
    772       else
    773         TheCall->replaceAllUsesWith(R->getReturnValue());
    774     }
    775     // Since we are now done with the Call/Invoke, we can delete it.
    776     TheCall->eraseFromParent();
    777 
    778     // Since we are now done with the return instruction, delete it also.
    779     Returns[0]->eraseFromParent();
    780 
    781     // We are now done with the inlining.
    782     return true;
    783   }
    784 
    785   // Otherwise, we have the normal case, of more than one block to inline or
    786   // multiple return sites.
    787 
    788   // We want to clone the entire callee function into the hole between the
    789   // "starter" and "ender" blocks.  How we accomplish this depends on whether
    790   // this is an invoke instruction or a call instruction.
    791   BasicBlock *AfterCallBB;
    792   BranchInst *CreatedBranchToNormalDest = NULL;
    793   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
    794 
    795     // Add an unconditional branch to make this look like the CallInst case...
    796     CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
    797 
    798     // Split the basic block.  This guarantees that no PHI nodes will have to be
    799     // updated due to new incoming edges, and make the invoke case more
    800     // symmetric to the call case.
    801     AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
    802                                           CalledFunc->getName()+".exit");
    803 
    804   } else {  // It's a call
    805     // If this is a call instruction, we need to split the basic block that
    806     // the call lives in.
    807     //
    808     AfterCallBB = OrigBB->splitBasicBlock(TheCall,
    809                                           CalledFunc->getName()+".exit");
    810   }
    811 
    812   // Change the branch that used to go to AfterCallBB to branch to the first
    813   // basic block of the inlined function.
    814   //
    815   TerminatorInst *Br = OrigBB->getTerminator();
    816   assert(Br && Br->getOpcode() == Instruction::Br &&
    817          "splitBasicBlock broken!");
    818   Br->setOperand(0, FirstNewBlock);
    819 
    820 
    821   // Now that the function is correct, make it a little bit nicer.  In
    822   // particular, move the basic blocks inserted from the end of the function
    823   // into the space made by splitting the source basic block.
    824   Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
    825                                      FirstNewBlock, Caller->end());
    826 
    827   // Handle all of the return instructions that we just cloned in, and eliminate
    828   // any users of the original call/invoke instruction.
    829   Type *RTy = CalledFunc->getReturnType();
    830 
    831   PHINode *PHI = 0;
    832   if (Returns.size() > 1) {
    833     // The PHI node should go at the front of the new basic block to merge all
    834     // possible incoming values.
    835     if (!TheCall->use_empty()) {
    836       PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
    837                             AfterCallBB->begin());
    838       // Anything that used the result of the function call should now use the
    839       // PHI node as their operand.
    840       TheCall->replaceAllUsesWith(PHI);
    841     }
    842 
    843     // Loop over all of the return instructions adding entries to the PHI node
    844     // as appropriate.
    845     if (PHI) {
    846       for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
    847         ReturnInst *RI = Returns[i];
    848         assert(RI->getReturnValue()->getType() == PHI->getType() &&
    849                "Ret value not consistent in function!");
    850         PHI->addIncoming(RI->getReturnValue(), RI->getParent());
    851       }
    852     }
    853 
    854 
    855     // Add a branch to the merge points and remove return instructions.
    856     DebugLoc Loc;
    857     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
    858       ReturnInst *RI = Returns[i];
    859       BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
    860       Loc = RI->getDebugLoc();
    861       BI->setDebugLoc(Loc);
    862       RI->eraseFromParent();
    863     }
    864     // We need to set the debug location to *somewhere* inside the
    865     // inlined function. The line number may be nonsensical, but the
    866     // instruction will at least be associated with the right
    867     // function.
    868     if (CreatedBranchToNormalDest)
    869       CreatedBranchToNormalDest->setDebugLoc(Loc);
    870   } else if (!Returns.empty()) {
    871     // Otherwise, if there is exactly one return value, just replace anything
    872     // using the return value of the call with the computed value.
    873     if (!TheCall->use_empty()) {
    874       if (TheCall == Returns[0]->getReturnValue())
    875         TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
    876       else
    877         TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
    878     }
    879 
    880     // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    881     BasicBlock *ReturnBB = Returns[0]->getParent();
    882     ReturnBB->replaceAllUsesWith(AfterCallBB);
    883 
    884     // Splice the code from the return block into the block that it will return
    885     // to, which contains the code that was after the call.
    886     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
    887                                       ReturnBB->getInstList());
    888 
    889     if (CreatedBranchToNormalDest)
    890       CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
    891 
    892     // Delete the return instruction now and empty ReturnBB now.
    893     Returns[0]->eraseFromParent();
    894     ReturnBB->eraseFromParent();
    895   } else if (!TheCall->use_empty()) {
    896     // No returns, but something is using the return value of the call.  Just
    897     // nuke the result.
    898     TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
    899   }
    900 
    901   // Since we are now done with the Call/Invoke, we can delete it.
    902   TheCall->eraseFromParent();
    903 
    904   // We should always be able to fold the entry block of the function into the
    905   // single predecessor of the block...
    906   assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
    907   BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
    908 
    909   // Splice the code entry block into calling block, right before the
    910   // unconditional branch.
    911   CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
    912   OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
    913 
    914   // Remove the unconditional branch.
    915   OrigBB->getInstList().erase(Br);
    916 
    917   // Now we can remove the CalleeEntry block, which is now empty.
    918   Caller->getBasicBlockList().erase(CalleeEntry);
    919 
    920   // If we inserted a phi node, check to see if it has a single value (e.g. all
    921   // the entries are the same or undef).  If so, remove the PHI so it doesn't
    922   // block other optimizations.
    923   if (PHI) {
    924     if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
    925       PHI->replaceAllUsesWith(V);
    926       PHI->eraseFromParent();
    927     }
    928   }
    929 
    930   return true;
    931 }
    932