Home | History | Annotate | Download | only in Scalar
      1 //===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Place garbage collection safepoints at appropriate locations in the IR. This
     11 // does not make relocation semantics or variable liveness explicit.  That's
     12 // done by RewriteStatepointsForGC.
     13 //
     14 // Terminology:
     15 // - A call is said to be "parseable" if there is a stack map generated for the
     16 // return PC of the call.  A runtime can determine where values listed in the
     17 // deopt arguments and (after RewriteStatepointsForGC) gc arguments are located
     18 // on the stack when the code is suspended inside such a call.  Every parse
     19 // point is represented by a call wrapped in an gc.statepoint intrinsic.
     20 // - A "poll" is an explicit check in the generated code to determine if the
     21 // runtime needs the generated code to cooperate by calling a helper routine
     22 // and thus suspending its execution at a known state. The call to the helper
     23 // routine will be parseable.  The (gc & runtime specific) logic of a poll is
     24 // assumed to be provided in a function of the name "gc.safepoint_poll".
     25 //
     26 // We aim to insert polls such that running code can quickly be brought to a
     27 // well defined state for inspection by the collector.  In the current
     28 // implementation, this is done via the insertion of poll sites at method entry
     29 // and the backedge of most loops.  We try to avoid inserting more polls than
     30 // are necessary to ensure a finite period between poll sites.  This is not
     31 // because the poll itself is expensive in the generated code; it's not.  Polls
     32 // do tend to impact the optimizer itself in negative ways; we'd like to avoid
     33 // perturbing the optimization of the method as much as we can.
     34 //
     35 // We also need to make most call sites parseable.  The callee might execute a
     36 // poll (or otherwise be inspected by the GC).  If so, the entire stack
     37 // (including the suspended frame of the current method) must be parseable.
     38 //
     39 // This pass will insert:
     40 // - Call parse points ("call safepoints") for any call which may need to
     41 // reach a safepoint during the execution of the callee function.
     42 // - Backedge safepoint polls and entry safepoint polls to ensure that
     43 // executing code reaches a safepoint poll in a finite amount of time.
     44 //
     45 // We do not currently support return statepoints, but adding them would not
     46 // be hard.  They are not required for correctness - entry safepoints are an
     47 // alternative - but some GCs may prefer them.  Patches welcome.
     48 //
     49 //===----------------------------------------------------------------------===//
     50 
     51 #include "llvm/Pass.h"
     52 #include "llvm/IR/LegacyPassManager.h"
     53 #include "llvm/ADT/SetOperations.h"
     54 #include "llvm/ADT/SetVector.h"
     55 #include "llvm/ADT/Statistic.h"
     56 #include "llvm/ADT/StringRef.h"
     57 #include "llvm/Analysis/LoopPass.h"
     58 #include "llvm/Analysis/LoopInfo.h"
     59 #include "llvm/Analysis/ScalarEvolution.h"
     60 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
     61 #include "llvm/Analysis/CFG.h"
     62 #include "llvm/Analysis/InstructionSimplify.h"
     63 #include "llvm/IR/BasicBlock.h"
     64 #include "llvm/IR/CallSite.h"
     65 #include "llvm/IR/Dominators.h"
     66 #include "llvm/IR/Function.h"
     67 #include "llvm/IR/IRBuilder.h"
     68 #include "llvm/IR/InstIterator.h"
     69 #include "llvm/IR/Instructions.h"
     70 #include "llvm/IR/Intrinsics.h"
     71 #include "llvm/IR/IntrinsicInst.h"
     72 #include "llvm/IR/Module.h"
     73 #include "llvm/IR/Statepoint.h"
     74 #include "llvm/IR/Value.h"
     75 #include "llvm/IR/Verifier.h"
     76 #include "llvm/Support/Debug.h"
     77 #include "llvm/Support/CommandLine.h"
     78 #include "llvm/Support/raw_ostream.h"
     79 #include "llvm/Transforms/Scalar.h"
     80 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     81 #include "llvm/Transforms/Utils/Cloning.h"
     82 #include "llvm/Transforms/Utils/Local.h"
     83 
     84 #define DEBUG_TYPE "safepoint-placement"
     85 STATISTIC(NumEntrySafepoints, "Number of entry safepoints inserted");
     86 STATISTIC(NumCallSafepoints, "Number of call safepoints inserted");
     87 STATISTIC(NumBackedgeSafepoints, "Number of backedge safepoints inserted");
     88 
     89 STATISTIC(CallInLoop, "Number of loops w/o safepoints due to calls in loop");
     90 STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
     91 
     92 using namespace llvm;
     93 
     94 // Ignore opportunities to avoid placing safepoints on backedges, useful for
     95 // validation
     96 static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
     97                                   cl::init(false));
     98 
     99 /// How narrow does the trip count of a loop have to be to have to be considered
    100 /// "counted"?  Counted loops do not get safepoints at backedges.
    101 static cl::opt<int> CountedLoopTripWidth("spp-counted-loop-trip-width",
    102                                          cl::Hidden, cl::init(32));
    103 
    104 // If true, split the backedge of a loop when placing the safepoint, otherwise
    105 // split the latch block itself.  Both are useful to support for
    106 // experimentation, but in practice, it looks like splitting the backedge
    107 // optimizes better.
    108 static cl::opt<bool> SplitBackedge("spp-split-backedge", cl::Hidden,
    109                                    cl::init(false));
    110 
    111 // Print tracing output
    112 static cl::opt<bool> TraceLSP("spp-trace", cl::Hidden, cl::init(false));
    113 
    114 namespace {
    115 
    116 /// An analysis pass whose purpose is to identify each of the backedges in
    117 /// the function which require a safepoint poll to be inserted.
    118 struct PlaceBackedgeSafepointsImpl : public FunctionPass {
    119   static char ID;
    120 
    121   /// The output of the pass - gives a list of each backedge (described by
    122   /// pointing at the branch) which need a poll inserted.
    123   std::vector<TerminatorInst *> PollLocations;
    124 
    125   /// True unless we're running spp-no-calls in which case we need to disable
    126   /// the call-dependent placement opts.
    127   bool CallSafepointsEnabled;
    128 
    129   ScalarEvolution *SE = nullptr;
    130   DominatorTree *DT = nullptr;
    131   LoopInfo *LI = nullptr;
    132 
    133   PlaceBackedgeSafepointsImpl(bool CallSafepoints = false)
    134       : FunctionPass(ID), CallSafepointsEnabled(CallSafepoints) {
    135     initializePlaceBackedgeSafepointsImplPass(*PassRegistry::getPassRegistry());
    136   }
    137 
    138   bool runOnLoop(Loop *);
    139   void runOnLoopAndSubLoops(Loop *L) {
    140     // Visit all the subloops
    141     for (auto I = L->begin(), E = L->end(); I != E; I++)
    142       runOnLoopAndSubLoops(*I);
    143     runOnLoop(L);
    144   }
    145 
    146   bool runOnFunction(Function &F) override {
    147     SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
    148     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    149     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
    150     for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
    151       runOnLoopAndSubLoops(*I);
    152     }
    153     return false;
    154   }
    155 
    156   void getAnalysisUsage(AnalysisUsage &AU) const override {
    157     AU.addRequired<DominatorTreeWrapperPass>();
    158     AU.addRequired<ScalarEvolutionWrapperPass>();
    159     AU.addRequired<LoopInfoWrapperPass>();
    160     // We no longer modify the IR at all in this pass.  Thus all
    161     // analysis are preserved.
    162     AU.setPreservesAll();
    163   }
    164 };
    165 }
    166 
    167 static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
    168 static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
    169 static cl::opt<bool> NoBackedge("spp-no-backedge", cl::Hidden, cl::init(false));
    170 
    171 namespace {
    172 struct PlaceSafepoints : public FunctionPass {
    173   static char ID; // Pass identification, replacement for typeid
    174 
    175   PlaceSafepoints() : FunctionPass(ID) {
    176     initializePlaceSafepointsPass(*PassRegistry::getPassRegistry());
    177   }
    178   bool runOnFunction(Function &F) override;
    179 
    180   void getAnalysisUsage(AnalysisUsage &AU) const override {
    181     // We modify the graph wholesale (inlining, block insertion, etc).  We
    182     // preserve nothing at the moment.  We could potentially preserve dom tree
    183     // if that was worth doing
    184   }
    185 };
    186 }
    187 
    188 // Insert a safepoint poll immediately before the given instruction.  Does
    189 // not handle the parsability of state at the runtime call, that's the
    190 // callers job.
    191 static void
    192 InsertSafepointPoll(Instruction *InsertBefore,
    193                     std::vector<CallSite> &ParsePointsNeeded /*rval*/);
    194 
    195 static bool needsStatepoint(const CallSite &CS) {
    196   if (callsGCLeafFunction(CS))
    197     return false;
    198   if (CS.isCall()) {
    199     CallInst *call = cast<CallInst>(CS.getInstruction());
    200     if (call->isInlineAsm())
    201       return false;
    202   }
    203   if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS)) {
    204     return false;
    205   }
    206   return true;
    207 }
    208 
    209 static Value *ReplaceWithStatepoint(const CallSite &CS);
    210 
    211 /// Returns true if this loop is known to contain a call safepoint which
    212 /// must unconditionally execute on any iteration of the loop which returns
    213 /// to the loop header via an edge from Pred.  Returns a conservative correct
    214 /// answer; i.e. false is always valid.
    215 static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
    216                                                BasicBlock *Pred,
    217                                                DominatorTree &DT) {
    218   // In general, we're looking for any cut of the graph which ensures
    219   // there's a call safepoint along every edge between Header and Pred.
    220   // For the moment, we look only for the 'cuts' that consist of a single call
    221   // instruction in a block which is dominated by the Header and dominates the
    222   // loop latch (Pred) block.  Somewhat surprisingly, walking the entire chain
    223   // of such dominating blocks gets substantially more occurrences than just
    224   // checking the Pred and Header blocks themselves.  This may be due to the
    225   // density of loop exit conditions caused by range and null checks.
    226   // TODO: structure this as an analysis pass, cache the result for subloops,
    227   // avoid dom tree recalculations
    228   assert(DT.dominates(Header, Pred) && "loop latch not dominated by header?");
    229 
    230   BasicBlock *Current = Pred;
    231   while (true) {
    232     for (Instruction &I : *Current) {
    233       if (auto CS = CallSite(&I))
    234         // Note: Technically, needing a safepoint isn't quite the right
    235         // condition here.  We should instead be checking if the target method
    236         // has an
    237         // unconditional poll. In practice, this is only a theoretical concern
    238         // since we don't have any methods with conditional-only safepoint
    239         // polls.
    240         if (needsStatepoint(CS))
    241           return true;
    242     }
    243 
    244     if (Current == Header)
    245       break;
    246     Current = DT.getNode(Current)->getIDom()->getBlock();
    247   }
    248 
    249   return false;
    250 }
    251 
    252 /// Returns true if this loop is known to terminate in a finite number of
    253 /// iterations.  Note that this function may return false for a loop which
    254 /// does actual terminate in a finite constant number of iterations due to
    255 /// conservatism in the analysis.
    256 static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
    257                                     BasicBlock *Pred) {
    258   // A conservative bound on the loop as a whole.
    259   const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
    260   if (MaxTrips != SE->getCouldNotCompute() &&
    261       SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(
    262           CountedLoopTripWidth))
    263     return true;
    264 
    265   // If this is a conditional branch to the header with the alternate path
    266   // being outside the loop, we can ask questions about the execution frequency
    267   // of the exit block.
    268   if (L->isLoopExiting(Pred)) {
    269     // This returns an exact expression only.  TODO: We really only need an
    270     // upper bound here, but SE doesn't expose that.
    271     const SCEV *MaxExec = SE->getExitCount(L, Pred);
    272     if (MaxExec != SE->getCouldNotCompute() &&
    273         SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(
    274             CountedLoopTripWidth))
    275         return true;
    276   }
    277 
    278   return /* not finite */ false;
    279 }
    280 
    281 static void scanOneBB(Instruction *start, Instruction *end,
    282                       std::vector<CallInst *> &calls,
    283                       std::set<BasicBlock *> &seen,
    284                       std::vector<BasicBlock *> &worklist) {
    285   for (BasicBlock::iterator itr(start);
    286        itr != start->getParent()->end() && itr != BasicBlock::iterator(end);
    287        itr++) {
    288     if (CallInst *CI = dyn_cast<CallInst>(&*itr)) {
    289       calls.push_back(CI);
    290     }
    291     // FIXME: This code does not handle invokes
    292     assert(!dyn_cast<InvokeInst>(&*itr) &&
    293            "support for invokes in poll code needed");
    294     // Only add the successor blocks if we reach the terminator instruction
    295     // without encountering end first
    296     if (itr->isTerminator()) {
    297       BasicBlock *BB = itr->getParent();
    298       for (BasicBlock *Succ : successors(BB)) {
    299         if (seen.count(Succ) == 0) {
    300           worklist.push_back(Succ);
    301           seen.insert(Succ);
    302         }
    303       }
    304     }
    305   }
    306 }
    307 static void scanInlinedCode(Instruction *start, Instruction *end,
    308                             std::vector<CallInst *> &calls,
    309                             std::set<BasicBlock *> &seen) {
    310   calls.clear();
    311   std::vector<BasicBlock *> worklist;
    312   seen.insert(start->getParent());
    313   scanOneBB(start, end, calls, seen, worklist);
    314   while (!worklist.empty()) {
    315     BasicBlock *BB = worklist.back();
    316     worklist.pop_back();
    317     scanOneBB(&*BB->begin(), end, calls, seen, worklist);
    318   }
    319 }
    320 
    321 bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
    322   // Loop through all loop latches (branches controlling backedges).  We need
    323   // to place a safepoint on every backedge (potentially).
    324   // Note: In common usage, there will be only one edge due to LoopSimplify
    325   // having run sometime earlier in the pipeline, but this code must be correct
    326   // w.r.t. loops with multiple backedges.
    327   BasicBlock *header = L->getHeader();
    328   SmallVector<BasicBlock*, 16> LoopLatches;
    329   L->getLoopLatches(LoopLatches);
    330   for (BasicBlock *pred : LoopLatches) {
    331     assert(L->contains(pred));
    332 
    333     // Make a policy decision about whether this loop needs a safepoint or
    334     // not.  Note that this is about unburdening the optimizer in loops, not
    335     // avoiding the runtime cost of the actual safepoint.
    336     if (!AllBackedges) {
    337       if (mustBeFiniteCountedLoop(L, SE, pred)) {
    338         if (TraceLSP)
    339           errs() << "skipping safepoint placement in finite loop\n";
    340         FiniteExecution++;
    341         continue;
    342       }
    343       if (CallSafepointsEnabled &&
    344           containsUnconditionalCallSafepoint(L, header, pred, *DT)) {
    345         // Note: This is only semantically legal since we won't do any further
    346         // IPO or inlining before the actual call insertion..  If we hadn't, we
    347         // might latter loose this call safepoint.
    348         if (TraceLSP)
    349           errs() << "skipping safepoint placement due to unconditional call\n";
    350         CallInLoop++;
    351         continue;
    352       }
    353     }
    354 
    355     // TODO: We can create an inner loop which runs a finite number of
    356     // iterations with an outer loop which contains a safepoint.  This would
    357     // not help runtime performance that much, but it might help our ability to
    358     // optimize the inner loop.
    359 
    360     // Safepoint insertion would involve creating a new basic block (as the
    361     // target of the current backedge) which does the safepoint (of all live
    362     // variables) and branches to the true header
    363     TerminatorInst *term = pred->getTerminator();
    364 
    365     if (TraceLSP) {
    366       errs() << "[LSP] terminator instruction: ";
    367       term->dump();
    368     }
    369 
    370     PollLocations.push_back(term);
    371   }
    372 
    373   return false;
    374 }
    375 
    376 /// Returns true if an entry safepoint is not required before this callsite in
    377 /// the caller function.
    378 static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
    379   Instruction *Inst = CS.getInstruction();
    380   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    381     switch (II->getIntrinsicID()) {
    382     case Intrinsic::experimental_gc_statepoint:
    383     case Intrinsic::experimental_patchpoint_void:
    384     case Intrinsic::experimental_patchpoint_i64:
    385       // The can wrap an actual call which may grow the stack by an unbounded
    386       // amount or run forever.
    387       return false;
    388     default:
    389       // Most LLVM intrinsics are things which do not expand to actual calls, or
    390       // at least if they do, are leaf functions that cause only finite stack
    391       // growth.  In particular, the optimizer likes to form things like memsets
    392       // out of stores in the original IR.  Another important example is
    393       // llvm.localescape which must occur in the entry block.  Inserting a
    394       // safepoint before it is not legal since it could push the localescape
    395       // out of the entry block.
    396       return true;
    397     }
    398   }
    399   return false;
    400 }
    401 
    402 static Instruction *findLocationForEntrySafepoint(Function &F,
    403                                                   DominatorTree &DT) {
    404 
    405   // Conceptually, this poll needs to be on method entry, but in
    406   // practice, we place it as late in the entry block as possible.  We
    407   // can place it as late as we want as long as it dominates all calls
    408   // that can grow the stack.  This, combined with backedge polls,
    409   // give us all the progress guarantees we need.
    410 
    411   // hasNextInstruction and nextInstruction are used to iterate
    412   // through a "straight line" execution sequence.
    413 
    414   auto hasNextInstruction = [](Instruction *I) {
    415     if (!I->isTerminator()) {
    416       return true;
    417     }
    418     BasicBlock *nextBB = I->getParent()->getUniqueSuccessor();
    419     return nextBB && (nextBB->getUniquePredecessor() != nullptr);
    420   };
    421 
    422   auto nextInstruction = [&hasNextInstruction](Instruction *I) {
    423     assert(hasNextInstruction(I) &&
    424            "first check if there is a next instruction!");
    425     if (I->isTerminator()) {
    426       return &I->getParent()->getUniqueSuccessor()->front();
    427     } else {
    428       return &*++I->getIterator();
    429     }
    430   };
    431 
    432   Instruction *cursor = nullptr;
    433   for (cursor = &F.getEntryBlock().front(); hasNextInstruction(cursor);
    434        cursor = nextInstruction(cursor)) {
    435 
    436     // We need to ensure a safepoint poll occurs before any 'real' call.  The
    437     // easiest way to ensure finite execution between safepoints in the face of
    438     // recursive and mutually recursive functions is to enforce that each take
    439     // a safepoint.  Additionally, we need to ensure a poll before any call
    440     // which can grow the stack by an unbounded amount.  This isn't required
    441     // for GC semantics per se, but is a common requirement for languages
    442     // which detect stack overflow via guard pages and then throw exceptions.
    443     if (auto CS = CallSite(cursor)) {
    444       if (doesNotRequireEntrySafepointBefore(CS))
    445         continue;
    446       break;
    447     }
    448   }
    449 
    450   assert((hasNextInstruction(cursor) || cursor->isTerminator()) &&
    451          "either we stopped because of a call, or because of terminator");
    452 
    453   return cursor;
    454 }
    455 
    456 /// Identify the list of call sites which need to be have parseable state
    457 static void findCallSafepoints(Function &F,
    458                                std::vector<CallSite> &Found /*rval*/) {
    459   assert(Found.empty() && "must be empty!");
    460   for (Instruction &I : instructions(F)) {
    461     Instruction *inst = &I;
    462     if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
    463       CallSite CS(inst);
    464 
    465       // No safepoint needed or wanted
    466       if (!needsStatepoint(CS)) {
    467         continue;
    468       }
    469 
    470       Found.push_back(CS);
    471     }
    472   }
    473 }
    474 
    475 /// Implement a unique function which doesn't require we sort the input
    476 /// vector.  Doing so has the effect of changing the output of a couple of
    477 /// tests in ways which make them less useful in testing fused safepoints.
    478 template <typename T> static void unique_unsorted(std::vector<T> &vec) {
    479   std::set<T> seen;
    480   std::vector<T> tmp;
    481   vec.reserve(vec.size());
    482   std::swap(tmp, vec);
    483   for (auto V : tmp) {
    484     if (seen.insert(V).second) {
    485       vec.push_back(V);
    486     }
    487   }
    488 }
    489 
    490 static const char *const GCSafepointPollName = "gc.safepoint_poll";
    491 
    492 static bool isGCSafepointPoll(Function &F) {
    493   return F.getName().equals(GCSafepointPollName);
    494 }
    495 
    496 /// Returns true if this function should be rewritten to include safepoint
    497 /// polls and parseable call sites.  The main point of this function is to be
    498 /// an extension point for custom logic.
    499 static bool shouldRewriteFunction(Function &F) {
    500   // TODO: This should check the GCStrategy
    501   if (F.hasGC()) {
    502     const char *FunctionGCName = F.getGC();
    503     const StringRef StatepointExampleName("statepoint-example");
    504     const StringRef CoreCLRName("coreclr");
    505     return (StatepointExampleName == FunctionGCName) ||
    506            (CoreCLRName == FunctionGCName);
    507   } else
    508     return false;
    509 }
    510 
    511 // TODO: These should become properties of the GCStrategy, possibly with
    512 // command line overrides.
    513 static bool enableEntrySafepoints(Function &F) { return !NoEntry; }
    514 static bool enableBackedgeSafepoints(Function &F) { return !NoBackedge; }
    515 static bool enableCallSafepoints(Function &F) { return !NoCall; }
    516 
    517 // Normalize basic block to make it ready to be target of invoke statepoint.
    518 // Ensure that 'BB' does not have phi nodes. It may require spliting it.
    519 static BasicBlock *normalizeForInvokeSafepoint(BasicBlock *BB,
    520                                                BasicBlock *InvokeParent) {
    521   BasicBlock *ret = BB;
    522 
    523   if (!BB->getUniquePredecessor()) {
    524     ret = SplitBlockPredecessors(BB, InvokeParent, "");
    525   }
    526 
    527   // Now that 'ret' has unique predecessor we can safely remove all phi nodes
    528   // from it
    529   FoldSingleEntryPHINodes(ret);
    530   assert(!isa<PHINode>(ret->begin()));
    531 
    532   return ret;
    533 }
    534 
    535 bool PlaceSafepoints::runOnFunction(Function &F) {
    536   if (F.isDeclaration() || F.empty()) {
    537     // This is a declaration, nothing to do.  Must exit early to avoid crash in
    538     // dom tree calculation
    539     return false;
    540   }
    541 
    542   if (isGCSafepointPoll(F)) {
    543     // Given we're inlining this inside of safepoint poll insertion, this
    544     // doesn't make any sense.  Note that we do make any contained calls
    545     // parseable after we inline a poll.
    546     return false;
    547   }
    548 
    549   if (!shouldRewriteFunction(F))
    550     return false;
    551 
    552   bool modified = false;
    553 
    554   // In various bits below, we rely on the fact that uses are reachable from
    555   // defs.  When there are basic blocks unreachable from the entry, dominance
    556   // and reachablity queries return non-sensical results.  Thus, we preprocess
    557   // the function to ensure these properties hold.
    558   modified |= removeUnreachableBlocks(F);
    559 
    560   // STEP 1 - Insert the safepoint polling locations.  We do not need to
    561   // actually insert parse points yet.  That will be done for all polls and
    562   // calls in a single pass.
    563 
    564   DominatorTree DT;
    565   DT.recalculate(F);
    566 
    567   SmallVector<Instruction *, 16> PollsNeeded;
    568   std::vector<CallSite> ParsePointNeeded;
    569 
    570   if (enableBackedgeSafepoints(F)) {
    571     // Construct a pass manager to run the LoopPass backedge logic.  We
    572     // need the pass manager to handle scheduling all the loop passes
    573     // appropriately.  Doing this by hand is painful and just not worth messing
    574     // with for the moment.
    575     legacy::FunctionPassManager FPM(F.getParent());
    576     bool CanAssumeCallSafepoints = enableCallSafepoints(F);
    577     PlaceBackedgeSafepointsImpl *PBS =
    578       new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
    579     FPM.add(PBS);
    580     FPM.run(F);
    581 
    582     // We preserve dominance information when inserting the poll, otherwise
    583     // we'd have to recalculate this on every insert
    584     DT.recalculate(F);
    585 
    586     auto &PollLocations = PBS->PollLocations;
    587 
    588     auto OrderByBBName = [](Instruction *a, Instruction *b) {
    589       return a->getParent()->getName() < b->getParent()->getName();
    590     };
    591     // We need the order of list to be stable so that naming ends up stable
    592     // when we split edges.  This makes test cases much easier to write.
    593     std::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);
    594 
    595     // We can sometimes end up with duplicate poll locations.  This happens if
    596     // a single loop is visited more than once.   The fact this happens seems
    597     // wrong, but it does happen for the split-backedge.ll test case.
    598     PollLocations.erase(std::unique(PollLocations.begin(),
    599                                     PollLocations.end()),
    600                         PollLocations.end());
    601 
    602     // Insert a poll at each point the analysis pass identified
    603     // The poll location must be the terminator of a loop latch block.
    604     for (TerminatorInst *Term : PollLocations) {
    605       // We are inserting a poll, the function is modified
    606       modified = true;
    607 
    608       if (SplitBackedge) {
    609         // Split the backedge of the loop and insert the poll within that new
    610         // basic block.  This creates a loop with two latches per original
    611         // latch (which is non-ideal), but this appears to be easier to
    612         // optimize in practice than inserting the poll immediately before the
    613         // latch test.
    614 
    615         // Since this is a latch, at least one of the successors must dominate
    616         // it. Its possible that we have a) duplicate edges to the same header
    617         // and b) edges to distinct loop headers.  We need to insert pools on
    618         // each.
    619         SetVector<BasicBlock *> Headers;
    620         for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
    621           BasicBlock *Succ = Term->getSuccessor(i);
    622           if (DT.dominates(Succ, Term->getParent())) {
    623             Headers.insert(Succ);
    624           }
    625         }
    626         assert(!Headers.empty() && "poll location is not a loop latch?");
    627 
    628         // The split loop structure here is so that we only need to recalculate
    629         // the dominator tree once.  Alternatively, we could just keep it up to
    630         // date and use a more natural merged loop.
    631         SetVector<BasicBlock *> SplitBackedges;
    632         for (BasicBlock *Header : Headers) {
    633           BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
    634           PollsNeeded.push_back(NewBB->getTerminator());
    635           NumBackedgeSafepoints++;
    636         }
    637       } else {
    638         // Split the latch block itself, right before the terminator.
    639         PollsNeeded.push_back(Term);
    640         NumBackedgeSafepoints++;
    641       }
    642     }
    643   }
    644 
    645   if (enableEntrySafepoints(F)) {
    646     Instruction *Location = findLocationForEntrySafepoint(F, DT);
    647     if (!Location) {
    648       // policy choice not to insert?
    649     } else {
    650       PollsNeeded.push_back(Location);
    651       modified = true;
    652       NumEntrySafepoints++;
    653     }
    654   }
    655 
    656   // Now that we've identified all the needed safepoint poll locations, insert
    657   // safepoint polls themselves.
    658   for (Instruction *PollLocation : PollsNeeded) {
    659     std::vector<CallSite> RuntimeCalls;
    660     InsertSafepointPoll(PollLocation, RuntimeCalls);
    661     ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
    662                             RuntimeCalls.end());
    663   }
    664   PollsNeeded.clear(); // make sure we don't accidentally use
    665   // The dominator tree has been invalidated by the inlining performed in the
    666   // above loop.  TODO: Teach the inliner how to update the dom tree?
    667   DT.recalculate(F);
    668 
    669   if (enableCallSafepoints(F)) {
    670     std::vector<CallSite> Calls;
    671     findCallSafepoints(F, Calls);
    672     NumCallSafepoints += Calls.size();
    673     ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end());
    674   }
    675 
    676   // Unique the vectors since we can end up with duplicates if we scan the call
    677   // site for call safepoints after we add it for entry or backedge.  The
    678   // only reason we need tracking at all is that some functions might have
    679   // polls but not call safepoints and thus we might miss marking the runtime
    680   // calls for the polls. (This is useful in test cases!)
    681   unique_unsorted(ParsePointNeeded);
    682 
    683   // Any parse point (no matter what source) will be handled here
    684 
    685   // We're about to start modifying the function
    686   if (!ParsePointNeeded.empty())
    687     modified = true;
    688 
    689   // Now run through and insert the safepoints, but do _NOT_ update or remove
    690   // any existing uses.  We have references to live variables that need to
    691   // survive to the last iteration of this loop.
    692   std::vector<Value *> Results;
    693   Results.reserve(ParsePointNeeded.size());
    694   for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
    695     CallSite &CS = ParsePointNeeded[i];
    696 
    697     // For invoke statepoints we need to remove all phi nodes at the normal
    698     // destination block.
    699     // Reason for this is that we can place gc_result only after last phi node
    700     // in basic block. We will get malformed code after RAUW for the
    701     // gc_result if one of this phi nodes uses result from the invoke.
    702     if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
    703       normalizeForInvokeSafepoint(Invoke->getNormalDest(),
    704                                   Invoke->getParent());
    705     }
    706 
    707     Value *GCResult = ReplaceWithStatepoint(CS);
    708     Results.push_back(GCResult);
    709   }
    710   assert(Results.size() == ParsePointNeeded.size());
    711 
    712   // Adjust all users of the old call sites to use the new ones instead
    713   for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
    714     CallSite &CS = ParsePointNeeded[i];
    715     Value *GCResult = Results[i];
    716     if (GCResult) {
    717       // Can not RAUW for the invoke gc result in case of phi nodes preset.
    718       assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin()));
    719 
    720       // Replace all uses with the new call
    721       CS.getInstruction()->replaceAllUsesWith(GCResult);
    722     }
    723 
    724     // Now that we've handled all uses, remove the original call itself
    725     // Note: The insert point can't be the deleted instruction!
    726     CS.getInstruction()->eraseFromParent();
    727   }
    728   return modified;
    729 }
    730 
    731 char PlaceBackedgeSafepointsImpl::ID = 0;
    732 char PlaceSafepoints::ID = 0;
    733 
    734 FunctionPass *llvm::createPlaceSafepointsPass() {
    735   return new PlaceSafepoints();
    736 }
    737 
    738 INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
    739                       "place-backedge-safepoints-impl",
    740                       "Place Backedge Safepoints", false, false)
    741 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
    742 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
    743 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
    744 INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
    745                     "place-backedge-safepoints-impl",
    746                     "Place Backedge Safepoints", false, false)
    747 
    748 INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
    749                       false, false)
    750 INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
    751                     false, false)
    752 
    753 static void
    754 InsertSafepointPoll(Instruction *InsertBefore,
    755                     std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
    756   BasicBlock *OrigBB = InsertBefore->getParent();
    757   Module *M = InsertBefore->getModule();
    758   assert(M && "must be part of a module");
    759 
    760   // Inline the safepoint poll implementation - this will get all the branch,
    761   // control flow, etc..  Most importantly, it will introduce the actual slow
    762   // path call - where we need to insert a safepoint (parsepoint).
    763 
    764   auto *F = M->getFunction(GCSafepointPollName);
    765   assert(F->getType()->getElementType() ==
    766          FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
    767          "gc.safepoint_poll declared with wrong type");
    768   assert(!F->empty() && "gc.safepoint_poll must be a non-empty function");
    769   CallInst *PollCall = CallInst::Create(F, "", InsertBefore);
    770 
    771   // Record some information about the call site we're replacing
    772   BasicBlock::iterator before(PollCall), after(PollCall);
    773   bool isBegin(false);
    774   if (before == OrigBB->begin()) {
    775     isBegin = true;
    776   } else {
    777     before--;
    778   }
    779   after++;
    780   assert(after != OrigBB->end() && "must have successor");
    781 
    782   // do the actual inlining
    783   InlineFunctionInfo IFI;
    784   bool InlineStatus = InlineFunction(PollCall, IFI);
    785   assert(InlineStatus && "inline must succeed");
    786   (void)InlineStatus; // suppress warning in release-asserts
    787 
    788   // Check post conditions
    789   assert(IFI.StaticAllocas.empty() && "can't have allocs");
    790 
    791   std::vector<CallInst *> calls; // new calls
    792   std::set<BasicBlock *> BBs;    // new BBs + insertee
    793   // Include only the newly inserted instructions, Note: begin may not be valid
    794   // if we inserted to the beginning of the basic block
    795   BasicBlock::iterator start;
    796   if (isBegin) {
    797     start = OrigBB->begin();
    798   } else {
    799     start = before;
    800     start++;
    801   }
    802 
    803   // If your poll function includes an unreachable at the end, that's not
    804   // valid.  Bugpoint likes to create this, so check for it.
    805   assert(isPotentiallyReachable(&*start, &*after, nullptr, nullptr) &&
    806          "malformed poll function");
    807 
    808   scanInlinedCode(&*(start), &*(after), calls, BBs);
    809   assert(!calls.empty() && "slow path not found for safepoint poll");
    810 
    811   // Record the fact we need a parsable state at the runtime call contained in
    812   // the poll function.  This is required so that the runtime knows how to
    813   // parse the last frame when we actually take  the safepoint (i.e. execute
    814   // the slow path)
    815   assert(ParsePointsNeeded.empty());
    816   for (size_t i = 0; i < calls.size(); i++) {
    817 
    818     // No safepoint needed or wanted
    819     if (!needsStatepoint(calls[i])) {
    820       continue;
    821     }
    822 
    823     // These are likely runtime calls.  Should we assert that via calling
    824     // convention or something?
    825     ParsePointsNeeded.push_back(CallSite(calls[i]));
    826   }
    827   assert(ParsePointsNeeded.size() <= calls.size());
    828 }
    829 
    830 /// Replaces the given call site (Call or Invoke) with a gc.statepoint
    831 /// intrinsic with an empty deoptimization arguments list.  This does
    832 /// NOT do explicit relocation for GC support.
    833 static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) {
    834   assert(CS.getInstruction()->getModule() && "must be set");
    835 
    836   // TODO: technically, a pass is not allowed to get functions from within a
    837   // function pass since it might trigger a new function addition.  Refactor
    838   // this logic out to the initialization of the pass.  Doesn't appear to
    839   // matter in practice.
    840 
    841   // Then go ahead and use the builder do actually do the inserts.  We insert
    842   // immediately before the previous instruction under the assumption that all
    843   // arguments will be available here.  We can't insert afterwards since we may
    844   // be replacing a terminator.
    845   IRBuilder<> Builder(CS.getInstruction());
    846 
    847   // Note: The gc args are not filled in at this time, that's handled by
    848   // RewriteStatepointsForGC (which is currently under review).
    849 
    850   // Create the statepoint given all the arguments
    851   Instruction *Token = nullptr;
    852 
    853   uint64_t ID;
    854   uint32_t NumPatchBytes;
    855 
    856   AttributeSet OriginalAttrs = CS.getAttributes();
    857   Attribute AttrID =
    858       OriginalAttrs.getAttribute(AttributeSet::FunctionIndex, "statepoint-id");
    859   Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
    860       AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
    861 
    862   AttrBuilder AttrsToRemove;
    863   bool HasID = AttrID.isStringAttribute() &&
    864                !AttrID.getValueAsString().getAsInteger(10, ID);
    865 
    866   if (HasID)
    867     AttrsToRemove.addAttribute("statepoint-id");
    868   else
    869     ID = 0xABCDEF00;
    870 
    871   bool HasNumPatchBytes =
    872       AttrNumPatchBytes.isStringAttribute() &&
    873       !AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
    874 
    875   if (HasNumPatchBytes)
    876     AttrsToRemove.addAttribute("statepoint-num-patch-bytes");
    877   else
    878     NumPatchBytes = 0;
    879 
    880   OriginalAttrs = OriginalAttrs.removeAttributes(
    881       CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
    882       AttrsToRemove);
    883 
    884   if (CS.isCall()) {
    885     CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
    886     CallInst *Call = Builder.CreateGCStatepointCall(
    887         ID, NumPatchBytes, CS.getCalledValue(),
    888         makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
    889         "safepoint_token");
    890     Call->setTailCall(ToReplace->isTailCall());
    891     Call->setCallingConv(ToReplace->getCallingConv());
    892 
    893     // In case if we can handle this set of attributes - set up function
    894     // attributes directly on statepoint and return attributes later for
    895     // gc_result intrinsic.
    896     Call->setAttributes(OriginalAttrs.getFnAttributes());
    897 
    898     Token = Call;
    899 
    900     // Put the following gc_result and gc_relocate calls immediately after
    901     // the old call (which we're about to delete).
    902     assert(ToReplace->getNextNode() && "not a terminator, must have next");
    903     Builder.SetInsertPoint(ToReplace->getNextNode());
    904     Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
    905   } else if (CS.isInvoke()) {
    906     InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
    907 
    908     // Insert the new invoke into the old block.  We'll remove the old one in a
    909     // moment at which point this will become the new terminator for the
    910     // original block.
    911     Builder.SetInsertPoint(ToReplace->getParent());
    912     InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
    913         ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(),
    914         ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
    915         None, None, "safepoint_token");
    916 
    917     Invoke->setCallingConv(ToReplace->getCallingConv());
    918 
    919     // In case if we can handle this set of attributes - set up function
    920     // attributes directly on statepoint and return attributes later for
    921     // gc_result intrinsic.
    922     Invoke->setAttributes(OriginalAttrs.getFnAttributes());
    923 
    924     Token = Invoke;
    925 
    926     // We'll insert the gc.result into the normal block
    927     BasicBlock *NormalDest = ToReplace->getNormalDest();
    928     // Can not insert gc.result in case of phi nodes preset.
    929     // Should have removed this cases prior to running this function
    930     assert(!isa<PHINode>(NormalDest->begin()));
    931     Instruction *IP = &*(NormalDest->getFirstInsertionPt());
    932     Builder.SetInsertPoint(IP);
    933   } else {
    934     llvm_unreachable("unexpect type of CallSite");
    935   }
    936   assert(Token);
    937 
    938   // Handle the return value of the original call - update all uses to use a
    939   // gc_result hanging off the statepoint node we just inserted
    940 
    941   // Only add the gc_result iff there is actually a used result
    942   if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
    943     std::string TakenName =
    944         CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
    945     CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName);
    946     GCResult->setAttributes(OriginalAttrs.getRetAttributes());
    947     return GCResult;
    948   } else {
    949     // No return value for the call.
    950     return nullptr;
    951   }
    952 }
    953