Home | History | Annotate | Download | only in Scalar
      1 //===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass hoists instructions to enable speculative execution on
     11 // targets where branches are expensive. This is aimed at GPUs. It
     12 // currently works on simple if-then and if-then-else
     13 // patterns.
     14 //
     15 // Removing branches is not the only motivation for this
     16 // pass. E.g. consider this code and assume that there is no
     17 // addressing mode for multiplying by sizeof(*a):
     18 //
     19 //   if (b > 0)
     20 //     c = a[i + 1]
     21 //   if (d > 0)
     22 //     e = a[i + 2]
     23 //
     24 // turns into
     25 //
     26 //   p = &a[i + 1];
     27 //   if (b > 0)
     28 //     c = *p;
     29 //   q = &a[i + 2];
     30 //   if (d > 0)
     31 //     e = *q;
     32 //
     33 // which could later be optimized to
     34 //
     35 //   r = &a[i];
     36 //   if (b > 0)
     37 //     c = r[1];
     38 //   if (d > 0)
     39 //     e = r[2];
     40 //
     41 // Later passes sink back much of the speculated code that did not enable
     42 // further optimization.
     43 //
     44 // This pass is more aggressive than the function SpeculativeyExecuteBB in
     45 // SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and
     46 // it will speculate at most one instruction. It also will not speculate if
     47 // there is a value defined in the if-block that is only used in the then-block.
     48 // These restrictions make sense since the speculation in SimplifyCFG seems
     49 // aimed at introducing cheap selects, while this pass is intended to do more
     50 // aggressive speculation while counting on later passes to either capitalize on
     51 // that or clean it up.
     52 //
     53 // If the pass was created by calling
     54 // createSpeculativeExecutionIfHasBranchDivergencePass or the
     55 // -spec-exec-only-if-divergent-target option is present, this pass only has an
     56 // effect on targets where TargetTransformInfo::hasBranchDivergence() is true;
     57 // on other targets, it is a nop.
     58 //
     59 // This lets you include this pass unconditionally in the IR pass pipeline, but
     60 // only enable it for relevant targets.
     61 //
     62 //===----------------------------------------------------------------------===//
     63 
     64 #include "llvm/ADT/SmallSet.h"
     65 #include "llvm/Analysis/GlobalsModRef.h"
     66 #include "llvm/Analysis/TargetTransformInfo.h"
     67 #include "llvm/Analysis/ValueTracking.h"
     68 #include "llvm/IR/Instructions.h"
     69 #include "llvm/IR/Module.h"
     70 #include "llvm/IR/Operator.h"
     71 #include "llvm/Support/CommandLine.h"
     72 #include "llvm/Support/Debug.h"
     73 
     74 using namespace llvm;
     75 
     76 #define DEBUG_TYPE "speculative-execution"
     77 
     78 // The risk that speculation will not pay off increases with the
     79 // number of instructions speculated, so we put a limit on that.
     80 static cl::opt<unsigned> SpecExecMaxSpeculationCost(
     81     "spec-exec-max-speculation-cost", cl::init(7), cl::Hidden,
     82     cl::desc("Speculative execution is not applied to basic blocks where "
     83              "the cost of the instructions to speculatively execute "
     84              "exceeds this limit."));
     85 
     86 // Speculating just a few instructions from a larger block tends not
     87 // to be profitable and this limit prevents that. A reason for that is
     88 // that small basic blocks are more likely to be candidates for
     89 // further optimization.
     90 static cl::opt<unsigned> SpecExecMaxNotHoisted(
     91     "spec-exec-max-not-hoisted", cl::init(5), cl::Hidden,
     92     cl::desc("Speculative execution is not applied to basic blocks where the "
     93              "number of instructions that would not be speculatively executed "
     94              "exceeds this limit."));
     95 
     96 static cl::opt<bool> SpecExecOnlyIfDivergentTarget(
     97     "spec-exec-only-if-divergent-target", cl::init(false), cl::Hidden,
     98     cl::desc("Speculative execution is applied only to targets with divergent "
     99              "branches, even if the pass was configured to apply only to all "
    100              "targets."));
    101 
    102 namespace {
    103 
    104 class SpeculativeExecution : public FunctionPass {
    105  public:
    106    static char ID;
    107    explicit SpeculativeExecution(bool OnlyIfDivergentTarget = false)
    108        : FunctionPass(ID),
    109          OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
    110                                SpecExecOnlyIfDivergentTarget) {}
    111 
    112    void getAnalysisUsage(AnalysisUsage &AU) const override;
    113    bool runOnFunction(Function &F) override;
    114 
    115    const char *getPassName() const override {
    116      if (OnlyIfDivergentTarget)
    117        return "Speculatively execute instructions if target has divergent "
    118               "branches";
    119      return "Speculatively execute instructions";
    120    }
    121 
    122  private:
    123   bool runOnBasicBlock(BasicBlock &B);
    124   bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
    125 
    126   // If true, this pass is a nop unless the target architecture has branch
    127   // divergence.
    128   const bool OnlyIfDivergentTarget;
    129   const TargetTransformInfo *TTI = nullptr;
    130 };
    131 } // namespace
    132 
    133 char SpeculativeExecution::ID = 0;
    134 INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution",
    135                       "Speculatively execute instructions", false, false)
    136 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
    137 INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",
    138                     "Speculatively execute instructions", false, false)
    139 
    140 void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
    141   AU.addRequired<TargetTransformInfoWrapperPass>();
    142   AU.addPreserved<GlobalsAAWrapperPass>();
    143 }
    144 
    145 bool SpeculativeExecution::runOnFunction(Function &F) {
    146   if (skipFunction(F))
    147     return false;
    148 
    149   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
    150   if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) {
    151     DEBUG(dbgs() << "Not running SpeculativeExecution because "
    152                     "TTI->hasBranchDivergence() is false.\n");
    153     return false;
    154   }
    155 
    156   bool Changed = false;
    157   for (auto& B : F) {
    158     Changed |= runOnBasicBlock(B);
    159   }
    160   return Changed;
    161 }
    162 
    163 bool SpeculativeExecution::runOnBasicBlock(BasicBlock &B) {
    164   BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator());
    165   if (BI == nullptr)
    166     return false;
    167 
    168   if (BI->getNumSuccessors() != 2)
    169     return false;
    170   BasicBlock &Succ0 = *BI->getSuccessor(0);
    171   BasicBlock &Succ1 = *BI->getSuccessor(1);
    172 
    173   if (&B == &Succ0 || &B == &Succ1 || &Succ0 == &Succ1) {
    174     return false;
    175   }
    176 
    177   // Hoist from if-then (triangle).
    178   if (Succ0.getSinglePredecessor() != nullptr &&
    179       Succ0.getSingleSuccessor() == &Succ1) {
    180     return considerHoistingFromTo(Succ0, B);
    181   }
    182 
    183   // Hoist from if-else (triangle).
    184   if (Succ1.getSinglePredecessor() != nullptr &&
    185       Succ1.getSingleSuccessor() == &Succ0) {
    186     return considerHoistingFromTo(Succ1, B);
    187   }
    188 
    189   // Hoist from if-then-else (diamond), but only if it is equivalent to
    190   // an if-else or if-then due to one of the branches doing nothing.
    191   if (Succ0.getSinglePredecessor() != nullptr &&
    192       Succ1.getSinglePredecessor() != nullptr &&
    193       Succ1.getSingleSuccessor() != nullptr &&
    194       Succ1.getSingleSuccessor() != &B &&
    195       Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()) {
    196     // If a block has only one instruction, then that is a terminator
    197     // instruction so that the block does nothing. This does happen.
    198     if (Succ1.size() == 1) // equivalent to if-then
    199       return considerHoistingFromTo(Succ0, B);
    200     if (Succ0.size() == 1) // equivalent to if-else
    201       return considerHoistingFromTo(Succ1, B);
    202   }
    203 
    204   return false;
    205 }
    206 
    207 static unsigned ComputeSpeculationCost(const Instruction *I,
    208                                        const TargetTransformInfo &TTI) {
    209   switch (Operator::getOpcode(I)) {
    210     case Instruction::GetElementPtr:
    211     case Instruction::Add:
    212     case Instruction::Mul:
    213     case Instruction::And:
    214     case Instruction::Or:
    215     case Instruction::Select:
    216     case Instruction::Shl:
    217     case Instruction::Sub:
    218     case Instruction::LShr:
    219     case Instruction::AShr:
    220     case Instruction::Xor:
    221     case Instruction::ZExt:
    222     case Instruction::SExt:
    223       return TTI.getUserCost(I);
    224 
    225     default:
    226       return UINT_MAX; // Disallow anything not whitelisted.
    227   }
    228 }
    229 
    230 bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
    231                                                   BasicBlock &ToBlock) {
    232   SmallSet<const Instruction *, 8> NotHoisted;
    233   const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) {
    234     for (Value* V : U->operand_values()) {
    235       if (Instruction *I = dyn_cast<Instruction>(V)) {
    236         if (NotHoisted.count(I) > 0)
    237           return false;
    238       }
    239     }
    240     return true;
    241   };
    242 
    243   unsigned TotalSpeculationCost = 0;
    244   for (auto& I : FromBlock) {
    245     const unsigned Cost = ComputeSpeculationCost(&I, *TTI);
    246     if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) &&
    247         AllPrecedingUsesFromBlockHoisted(&I)) {
    248       TotalSpeculationCost += Cost;
    249       if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
    250         return false;  // too much to hoist
    251     } else {
    252       NotHoisted.insert(&I);
    253       if (NotHoisted.size() > SpecExecMaxNotHoisted)
    254         return false; // too much left behind
    255     }
    256   }
    257 
    258   if (TotalSpeculationCost == 0)
    259     return false; // nothing to hoist
    260 
    261   for (auto I = FromBlock.begin(); I != FromBlock.end();) {
    262     // We have to increment I before moving Current as moving Current
    263     // changes the list that I is iterating through.
    264     auto Current = I;
    265     ++I;
    266     if (!NotHoisted.count(&*Current)) {
    267       Current->moveBefore(ToBlock.getTerminator());
    268     }
    269   }
    270   return true;
    271 }
    272 
    273 namespace llvm {
    274 
    275 FunctionPass *createSpeculativeExecutionPass() {
    276   return new SpeculativeExecution();
    277 }
    278 
    279 FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
    280   return new SpeculativeExecution(/* OnlyIfDivergentTarget = */ true);
    281 }
    282 
    283 }  // namespace llvm
    284