Home | History | Annotate | Download | only in Scalar
      1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass performs a simple dominator tree walk that eliminates trivially
     11 // redundant instructions.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/Transforms/Scalar/EarlyCSE.h"
     16 #include "llvm/ADT/Hashing.h"
     17 #include "llvm/ADT/ScopedHashTable.h"
     18 #include "llvm/ADT/Statistic.h"
     19 #include "llvm/Analysis/AssumptionCache.h"
     20 #include "llvm/Analysis/GlobalsModRef.h"
     21 #include "llvm/Analysis/InstructionSimplify.h"
     22 #include "llvm/Analysis/TargetLibraryInfo.h"
     23 #include "llvm/Analysis/TargetTransformInfo.h"
     24 #include "llvm/IR/DataLayout.h"
     25 #include "llvm/IR/Dominators.h"
     26 #include "llvm/IR/Instructions.h"
     27 #include "llvm/IR/IntrinsicInst.h"
     28 #include "llvm/IR/PatternMatch.h"
     29 #include "llvm/Pass.h"
     30 #include "llvm/Support/Debug.h"
     31 #include "llvm/Support/RecyclingAllocator.h"
     32 #include "llvm/Support/raw_ostream.h"
     33 #include "llvm/Transforms/Scalar.h"
     34 #include "llvm/Transforms/Utils/Local.h"
     35 #include <deque>
     36 using namespace llvm;
     37 using namespace llvm::PatternMatch;
     38 
     39 #define DEBUG_TYPE "early-cse"
     40 
     41 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
     42 STATISTIC(NumCSE,      "Number of instructions CSE'd");
     43 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
     44 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
     45 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
     46 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
     47 
     48 //===----------------------------------------------------------------------===//
     49 // SimpleValue
     50 //===----------------------------------------------------------------------===//
     51 
     52 namespace {
     53 /// \brief Struct representing the available values in the scoped hash table.
     54 struct SimpleValue {
     55   Instruction *Inst;
     56 
     57   SimpleValue(Instruction *I) : Inst(I) {
     58     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
     59   }
     60 
     61   bool isSentinel() const {
     62     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
     63            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
     64   }
     65 
     66   static bool canHandle(Instruction *Inst) {
     67     // This can only handle non-void readnone functions.
     68     if (CallInst *CI = dyn_cast<CallInst>(Inst))
     69       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
     70     return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
     71            isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
     72            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
     73            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
     74            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
     75   }
     76 };
     77 }
     78 
     79 namespace llvm {
     80 template <> struct DenseMapInfo<SimpleValue> {
     81   static inline SimpleValue getEmptyKey() {
     82     return DenseMapInfo<Instruction *>::getEmptyKey();
     83   }
     84   static inline SimpleValue getTombstoneKey() {
     85     return DenseMapInfo<Instruction *>::getTombstoneKey();
     86   }
     87   static unsigned getHashValue(SimpleValue Val);
     88   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
     89 };
     90 }
     91 
     92 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
     93   Instruction *Inst = Val.Inst;
     94   // Hash in all of the operands as pointers.
     95   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
     96     Value *LHS = BinOp->getOperand(0);
     97     Value *RHS = BinOp->getOperand(1);
     98     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
     99       std::swap(LHS, RHS);
    100 
    101     return hash_combine(BinOp->getOpcode(), LHS, RHS);
    102   }
    103 
    104   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
    105     Value *LHS = CI->getOperand(0);
    106     Value *RHS = CI->getOperand(1);
    107     CmpInst::Predicate Pred = CI->getPredicate();
    108     if (Inst->getOperand(0) > Inst->getOperand(1)) {
    109       std::swap(LHS, RHS);
    110       Pred = CI->getSwappedPredicate();
    111     }
    112     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
    113   }
    114 
    115   if (CastInst *CI = dyn_cast<CastInst>(Inst))
    116     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
    117 
    118   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
    119     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
    120                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
    121 
    122   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
    123     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
    124                         IVI->getOperand(1),
    125                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
    126 
    127   assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
    128           isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
    129           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
    130           isa<ShuffleVectorInst>(Inst)) &&
    131          "Invalid/unknown instruction");
    132 
    133   // Mix in the opcode.
    134   return hash_combine(
    135       Inst->getOpcode(),
    136       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
    137 }
    138 
    139 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
    140   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
    141 
    142   if (LHS.isSentinel() || RHS.isSentinel())
    143     return LHSI == RHSI;
    144 
    145   if (LHSI->getOpcode() != RHSI->getOpcode())
    146     return false;
    147   if (LHSI->isIdenticalToWhenDefined(RHSI))
    148     return true;
    149 
    150   // If we're not strictly identical, we still might be a commutable instruction
    151   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
    152     if (!LHSBinOp->isCommutative())
    153       return false;
    154 
    155     assert(isa<BinaryOperator>(RHSI) &&
    156            "same opcode, but different instruction type?");
    157     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
    158 
    159     // Commuted equality
    160     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
    161            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
    162   }
    163   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
    164     assert(isa<CmpInst>(RHSI) &&
    165            "same opcode, but different instruction type?");
    166     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
    167     // Commuted equality
    168     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
    169            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
    170            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
    171   }
    172 
    173   return false;
    174 }
    175 
    176 //===----------------------------------------------------------------------===//
    177 // CallValue
    178 //===----------------------------------------------------------------------===//
    179 
    180 namespace {
    181 /// \brief Struct representing the available call values in the scoped hash
    182 /// table.
    183 struct CallValue {
    184   Instruction *Inst;
    185 
    186   CallValue(Instruction *I) : Inst(I) {
    187     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
    188   }
    189 
    190   bool isSentinel() const {
    191     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
    192            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
    193   }
    194 
    195   static bool canHandle(Instruction *Inst) {
    196     // Don't value number anything that returns void.
    197     if (Inst->getType()->isVoidTy())
    198       return false;
    199 
    200     CallInst *CI = dyn_cast<CallInst>(Inst);
    201     if (!CI || !CI->onlyReadsMemory())
    202       return false;
    203     return true;
    204   }
    205 };
    206 }
    207 
    208 namespace llvm {
    209 template <> struct DenseMapInfo<CallValue> {
    210   static inline CallValue getEmptyKey() {
    211     return DenseMapInfo<Instruction *>::getEmptyKey();
    212   }
    213   static inline CallValue getTombstoneKey() {
    214     return DenseMapInfo<Instruction *>::getTombstoneKey();
    215   }
    216   static unsigned getHashValue(CallValue Val);
    217   static bool isEqual(CallValue LHS, CallValue RHS);
    218 };
    219 }
    220 
    221 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
    222   Instruction *Inst = Val.Inst;
    223   // Hash all of the operands as pointers and mix in the opcode.
    224   return hash_combine(
    225       Inst->getOpcode(),
    226       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
    227 }
    228 
    229 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
    230   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
    231   if (LHS.isSentinel() || RHS.isSentinel())
    232     return LHSI == RHSI;
    233   return LHSI->isIdenticalTo(RHSI);
    234 }
    235 
    236 //===----------------------------------------------------------------------===//
    237 // EarlyCSE implementation
    238 //===----------------------------------------------------------------------===//
    239 
    240 namespace {
    241 /// \brief A simple and fast domtree-based CSE pass.
    242 ///
    243 /// This pass does a simple depth-first walk over the dominator tree,
    244 /// eliminating trivially redundant instructions and using instsimplify to
    245 /// canonicalize things as it goes. It is intended to be fast and catch obvious
    246 /// cases so that instcombine and other passes are more effective. It is
    247 /// expected that a later pass of GVN will catch the interesting/hard cases.
    248 class EarlyCSE {
    249 public:
    250   const TargetLibraryInfo &TLI;
    251   const TargetTransformInfo &TTI;
    252   DominatorTree &DT;
    253   AssumptionCache &AC;
    254   typedef RecyclingAllocator<
    255       BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
    256   typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
    257                           AllocatorTy> ScopedHTType;
    258 
    259   /// \brief A scoped hash table of the current values of all of our simple
    260   /// scalar expressions.
    261   ///
    262   /// As we walk down the domtree, we look to see if instructions are in this:
    263   /// if so, we replace them with what we find, otherwise we insert them so
    264   /// that dominated values can succeed in their lookup.
    265   ScopedHTType AvailableValues;
    266 
    267   /// A scoped hash table of the current values of previously encounted memory
    268   /// locations.
    269   ///
    270   /// This allows us to get efficient access to dominating loads or stores when
    271   /// we have a fully redundant load.  In addition to the most recent load, we
    272   /// keep track of a generation count of the read, which is compared against
    273   /// the current generation count.  The current generation count is incremented
    274   /// after every possibly writing memory operation, which ensures that we only
    275   /// CSE loads with other loads that have no intervening store.  Ordering
    276   /// events (such as fences or atomic instructions) increment the generation
    277   /// count as well; essentially, we model these as writes to all possible
    278   /// locations.  Note that atomic and/or volatile loads and stores can be
    279   /// present the table; it is the responsibility of the consumer to inspect
    280   /// the atomicity/volatility if needed.
    281   struct LoadValue {
    282     Instruction *DefInst;
    283     unsigned Generation;
    284     int MatchingId;
    285     bool IsAtomic;
    286     bool IsInvariant;
    287     LoadValue()
    288         : DefInst(nullptr), Generation(0), MatchingId(-1), IsAtomic(false),
    289           IsInvariant(false) {}
    290     LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
    291               bool IsAtomic, bool IsInvariant)
    292         : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
    293           IsAtomic(IsAtomic), IsInvariant(IsInvariant) {}
    294   };
    295   typedef RecyclingAllocator<BumpPtrAllocator,
    296                              ScopedHashTableVal<Value *, LoadValue>>
    297       LoadMapAllocator;
    298   typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
    299                           LoadMapAllocator> LoadHTType;
    300   LoadHTType AvailableLoads;
    301 
    302   /// \brief A scoped hash table of the current values of read-only call
    303   /// values.
    304   ///
    305   /// It uses the same generation count as loads.
    306   typedef ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>
    307       CallHTType;
    308   CallHTType AvailableCalls;
    309 
    310   /// \brief This is the current generation of the memory value.
    311   unsigned CurrentGeneration;
    312 
    313   /// \brief Set up the EarlyCSE runner for a particular function.
    314   EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
    315            DominatorTree &DT, AssumptionCache &AC)
    316       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
    317 
    318   bool run();
    319 
    320 private:
    321   // Almost a POD, but needs to call the constructors for the scoped hash
    322   // tables so that a new scope gets pushed on. These are RAII so that the
    323   // scope gets popped when the NodeScope is destroyed.
    324   class NodeScope {
    325   public:
    326     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
    327               CallHTType &AvailableCalls)
    328         : Scope(AvailableValues), LoadScope(AvailableLoads),
    329           CallScope(AvailableCalls) {}
    330 
    331   private:
    332     NodeScope(const NodeScope &) = delete;
    333     void operator=(const NodeScope &) = delete;
    334 
    335     ScopedHTType::ScopeTy Scope;
    336     LoadHTType::ScopeTy LoadScope;
    337     CallHTType::ScopeTy CallScope;
    338   };
    339 
    340   // Contains all the needed information to create a stack for doing a depth
    341   // first tranversal of the tree. This includes scopes for values, loads, and
    342   // calls as well as the generation. There is a child iterator so that the
    343   // children do not need to be store separately.
    344   class StackNode {
    345   public:
    346     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
    347               CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,
    348               DomTreeNode::iterator child, DomTreeNode::iterator end)
    349         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
    350           EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
    351           Processed(false) {}
    352 
    353     // Accessors.
    354     unsigned currentGeneration() { return CurrentGeneration; }
    355     unsigned childGeneration() { return ChildGeneration; }
    356     void childGeneration(unsigned generation) { ChildGeneration = generation; }
    357     DomTreeNode *node() { return Node; }
    358     DomTreeNode::iterator childIter() { return ChildIter; }
    359     DomTreeNode *nextChild() {
    360       DomTreeNode *child = *ChildIter;
    361       ++ChildIter;
    362       return child;
    363     }
    364     DomTreeNode::iterator end() { return EndIter; }
    365     bool isProcessed() { return Processed; }
    366     void process() { Processed = true; }
    367 
    368   private:
    369     StackNode(const StackNode &) = delete;
    370     void operator=(const StackNode &) = delete;
    371 
    372     // Members.
    373     unsigned CurrentGeneration;
    374     unsigned ChildGeneration;
    375     DomTreeNode *Node;
    376     DomTreeNode::iterator ChildIter;
    377     DomTreeNode::iterator EndIter;
    378     NodeScope Scopes;
    379     bool Processed;
    380   };
    381 
    382   /// \brief Wrapper class to handle memory instructions, including loads,
    383   /// stores and intrinsic loads and stores defined by the target.
    384   class ParseMemoryInst {
    385   public:
    386     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
    387       : IsTargetMemInst(false), Inst(Inst) {
    388       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
    389         if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
    390           IsTargetMemInst = true;
    391     }
    392     bool isLoad() const {
    393       if (IsTargetMemInst) return Info.ReadMem;
    394       return isa<LoadInst>(Inst);
    395     }
    396     bool isStore() const {
    397       if (IsTargetMemInst) return Info.WriteMem;
    398       return isa<StoreInst>(Inst);
    399     }
    400     bool isAtomic() const {
    401       if (IsTargetMemInst) {
    402         assert(Info.IsSimple && "need to refine IsSimple in TTI");
    403         return false;
    404       }
    405       return Inst->isAtomic();
    406     }
    407     bool isUnordered() const {
    408       if (IsTargetMemInst) {
    409         assert(Info.IsSimple && "need to refine IsSimple in TTI");
    410         return true;
    411       }
    412       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    413         return LI->isUnordered();
    414       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    415         return SI->isUnordered();
    416       }
    417       // Conservative answer
    418       return !Inst->isAtomic();
    419     }
    420 
    421     bool isVolatile() const {
    422       if (IsTargetMemInst) {
    423         assert(Info.IsSimple && "need to refine IsSimple in TTI");
    424         return false;
    425       }
    426       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    427         return LI->isVolatile();
    428       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    429         return SI->isVolatile();
    430       }
    431       // Conservative answer
    432       return true;
    433     }
    434 
    435     bool isInvariantLoad() const {
    436       if (auto *LI = dyn_cast<LoadInst>(Inst))
    437         return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
    438       return false;
    439     }
    440 
    441     bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
    442       return (getPointerOperand() == Inst.getPointerOperand() &&
    443               getMatchingId() == Inst.getMatchingId());
    444     }
    445     bool isValid() const { return getPointerOperand() != nullptr; }
    446 
    447     // For regular (non-intrinsic) loads/stores, this is set to -1. For
    448     // intrinsic loads/stores, the id is retrieved from the corresponding
    449     // field in the MemIntrinsicInfo structure.  That field contains
    450     // non-negative values only.
    451     int getMatchingId() const {
    452       if (IsTargetMemInst) return Info.MatchingId;
    453       return -1;
    454     }
    455     Value *getPointerOperand() const {
    456       if (IsTargetMemInst) return Info.PtrVal;
    457       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    458         return LI->getPointerOperand();
    459       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    460         return SI->getPointerOperand();
    461       }
    462       return nullptr;
    463     }
    464     bool mayReadFromMemory() const {
    465       if (IsTargetMemInst) return Info.ReadMem;
    466       return Inst->mayReadFromMemory();
    467     }
    468     bool mayWriteToMemory() const {
    469       if (IsTargetMemInst) return Info.WriteMem;
    470       return Inst->mayWriteToMemory();
    471     }
    472 
    473   private:
    474     bool IsTargetMemInst;
    475     MemIntrinsicInfo Info;
    476     Instruction *Inst;
    477   };
    478 
    479   bool processNode(DomTreeNode *Node);
    480 
    481   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
    482     if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
    483       return LI;
    484     else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
    485       return SI->getValueOperand();
    486     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
    487     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
    488                                                  ExpectedType);
    489   }
    490 };
    491 }
    492 
    493 bool EarlyCSE::processNode(DomTreeNode *Node) {
    494   bool Changed = false;
    495   BasicBlock *BB = Node->getBlock();
    496 
    497   // If this block has a single predecessor, then the predecessor is the parent
    498   // of the domtree node and all of the live out memory values are still current
    499   // in this block.  If this block has multiple predecessors, then they could
    500   // have invalidated the live-out memory values of our parent value.  For now,
    501   // just be conservative and invalidate memory if this block has multiple
    502   // predecessors.
    503   if (!BB->getSinglePredecessor())
    504     ++CurrentGeneration;
    505 
    506   // If this node has a single predecessor which ends in a conditional branch,
    507   // we can infer the value of the branch condition given that we took this
    508   // path.  We need the single predecessor to ensure there's not another path
    509   // which reaches this block where the condition might hold a different
    510   // value.  Since we're adding this to the scoped hash table (like any other
    511   // def), it will have been popped if we encounter a future merge block.
    512   if (BasicBlock *Pred = BB->getSinglePredecessor())
    513     if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
    514       if (BI->isConditional())
    515         if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
    516           if (SimpleValue::canHandle(CondInst)) {
    517             assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
    518             auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
    519               ConstantInt::getTrue(BB->getContext()) :
    520               ConstantInt::getFalse(BB->getContext());
    521             AvailableValues.insert(CondInst, ConditionalConstant);
    522             DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
    523                   << CondInst->getName() << "' as " << *ConditionalConstant
    524                   << " in " << BB->getName() << "\n");
    525             // Replace all dominated uses with the known value.
    526             if (unsigned Count =
    527                     replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
    528                                              BasicBlockEdge(Pred, BB))) {
    529               Changed = true;
    530               NumCSECVP = NumCSECVP + Count;
    531             }
    532           }
    533 
    534   /// LastStore - Keep track of the last non-volatile store that we saw... for
    535   /// as long as there in no instruction that reads memory.  If we see a store
    536   /// to the same location, we delete the dead store.  This zaps trivial dead
    537   /// stores which can occur in bitfield code among other things.
    538   Instruction *LastStore = nullptr;
    539 
    540   const DataLayout &DL = BB->getModule()->getDataLayout();
    541 
    542   // See if any instructions in the block can be eliminated.  If so, do it.  If
    543   // not, add them to AvailableValues.
    544   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
    545     Instruction *Inst = &*I++;
    546 
    547     // Dead instructions should just be removed.
    548     if (isInstructionTriviallyDead(Inst, &TLI)) {
    549       DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
    550       Inst->eraseFromParent();
    551       Changed = true;
    552       ++NumSimplify;
    553       continue;
    554     }
    555 
    556     // Skip assume intrinsics, they don't really have side effects (although
    557     // they're marked as such to ensure preservation of control dependencies),
    558     // and this pass will not disturb any of the assumption's control
    559     // dependencies.
    560     if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
    561       DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
    562       continue;
    563     }
    564 
    565     if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
    566       if (auto *CondI =
    567               dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
    568         // The condition we're on guarding here is true for all dominated
    569         // locations.
    570         if (SimpleValue::canHandle(CondI))
    571           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
    572       }
    573 
    574       // Guard intrinsics read all memory, but don't write any memory.
    575       // Accordingly, don't update the generation but consume the last store (to
    576       // avoid an incorrect DSE).
    577       LastStore = nullptr;
    578       continue;
    579     }
    580 
    581     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
    582     // its simpler value.
    583     if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
    584       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
    585       if (!Inst->use_empty()) {
    586         Inst->replaceAllUsesWith(V);
    587         Changed = true;
    588       }
    589       if (isInstructionTriviallyDead(Inst, &TLI)) {
    590         Inst->eraseFromParent();
    591         Changed = true;
    592       }
    593       if (Changed) {
    594         ++NumSimplify;
    595         continue;
    596       }
    597     }
    598 
    599     // If this is a simple instruction that we can value number, process it.
    600     if (SimpleValue::canHandle(Inst)) {
    601       // See if the instruction has an available value.  If so, use it.
    602       if (Value *V = AvailableValues.lookup(Inst)) {
    603         DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
    604         if (auto *I = dyn_cast<Instruction>(V))
    605           I->andIRFlags(Inst);
    606         Inst->replaceAllUsesWith(V);
    607         Inst->eraseFromParent();
    608         Changed = true;
    609         ++NumCSE;
    610         continue;
    611       }
    612 
    613       // Otherwise, just remember that this value is available.
    614       AvailableValues.insert(Inst, Inst);
    615       continue;
    616     }
    617 
    618     ParseMemoryInst MemInst(Inst, TTI);
    619     // If this is a non-volatile load, process it.
    620     if (MemInst.isValid() && MemInst.isLoad()) {
    621       // (conservatively) we can't peak past the ordering implied by this
    622       // operation, but we can add this load to our set of available values
    623       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
    624         LastStore = nullptr;
    625         ++CurrentGeneration;
    626       }
    627 
    628       // If we have an available version of this load, and if it is the right
    629       // generation or the load is known to be from an invariant location,
    630       // replace this instruction.
    631       //
    632       // A dominating invariant load implies that the location loaded from is
    633       // unchanging beginning at the point of the invariant load, so the load
    634       // we're CSE'ing _away_ does not need to be invariant, only the available
    635       // load we're CSE'ing _to_ does.
    636       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
    637       if (InVal.DefInst != nullptr &&
    638           (InVal.Generation == CurrentGeneration || InVal.IsInvariant) &&
    639           InVal.MatchingId == MemInst.getMatchingId() &&
    640           // We don't yet handle removing loads with ordering of any kind.
    641           !MemInst.isVolatile() && MemInst.isUnordered() &&
    642           // We can't replace an atomic load with one which isn't also atomic.
    643           InVal.IsAtomic >= MemInst.isAtomic()) {
    644         Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
    645         if (Op != nullptr) {
    646           DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
    647                        << "  to: " << *InVal.DefInst << '\n');
    648           if (!Inst->use_empty())
    649             Inst->replaceAllUsesWith(Op);
    650           Inst->eraseFromParent();
    651           Changed = true;
    652           ++NumCSELoad;
    653           continue;
    654         }
    655       }
    656 
    657       // Otherwise, remember that we have this instruction.
    658       AvailableLoads.insert(
    659           MemInst.getPointerOperand(),
    660           LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
    661                     MemInst.isAtomic(), MemInst.isInvariantLoad()));
    662       LastStore = nullptr;
    663       continue;
    664     }
    665 
    666     // If this instruction may read from memory, forget LastStore.
    667     // Load/store intrinsics will indicate both a read and a write to
    668     // memory.  The target may override this (e.g. so that a store intrinsic
    669     // does not read  from memory, and thus will be treated the same as a
    670     // regular store for commoning purposes).
    671     if (Inst->mayReadFromMemory() &&
    672         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
    673       LastStore = nullptr;
    674 
    675     // If this is a read-only call, process it.
    676     if (CallValue::canHandle(Inst)) {
    677       // If we have an available version of this call, and if it is the right
    678       // generation, replace this instruction.
    679       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
    680       if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
    681         DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
    682                      << "  to: " << *InVal.first << '\n');
    683         if (!Inst->use_empty())
    684           Inst->replaceAllUsesWith(InVal.first);
    685         Inst->eraseFromParent();
    686         Changed = true;
    687         ++NumCSECall;
    688         continue;
    689       }
    690 
    691       // Otherwise, remember that we have this instruction.
    692       AvailableCalls.insert(
    693           Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
    694       continue;
    695     }
    696 
    697     // A release fence requires that all stores complete before it, but does
    698     // not prevent the reordering of following loads 'before' the fence.  As a
    699     // result, we don't need to consider it as writing to memory and don't need
    700     // to advance the generation.  We do need to prevent DSE across the fence,
    701     // but that's handled above.
    702     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
    703       if (FI->getOrdering() == AtomicOrdering::Release) {
    704         assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
    705         continue;
    706       }
    707 
    708     // write back DSE - If we write back the same value we just loaded from
    709     // the same location and haven't passed any intervening writes or ordering
    710     // operations, we can remove the write.  The primary benefit is in allowing
    711     // the available load table to remain valid and value forward past where
    712     // the store originally was.
    713     if (MemInst.isValid() && MemInst.isStore()) {
    714       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
    715       if (InVal.DefInst &&
    716           InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
    717           InVal.Generation == CurrentGeneration &&
    718           InVal.MatchingId == MemInst.getMatchingId() &&
    719           // We don't yet handle removing stores with ordering of any kind.
    720           !MemInst.isVolatile() && MemInst.isUnordered()) {
    721         assert((!LastStore ||
    722                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
    723                 MemInst.getPointerOperand()) &&
    724                "can't have an intervening store!");
    725         DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
    726         Inst->eraseFromParent();
    727         Changed = true;
    728         ++NumDSE;
    729         // We can avoid incrementing the generation count since we were able
    730         // to eliminate this store.
    731         continue;
    732       }
    733     }
    734 
    735     // Okay, this isn't something we can CSE at all.  Check to see if it is
    736     // something that could modify memory.  If so, our available memory values
    737     // cannot be used so bump the generation count.
    738     if (Inst->mayWriteToMemory()) {
    739       ++CurrentGeneration;
    740 
    741       if (MemInst.isValid() && MemInst.isStore()) {
    742         // We do a trivial form of DSE if there are two stores to the same
    743         // location with no intervening loads.  Delete the earlier store.
    744         // At the moment, we don't remove ordered stores, but do remove
    745         // unordered atomic stores.  There's no special requirement (for
    746         // unordered atomics) about removing atomic stores only in favor of
    747         // other atomic stores since we we're going to execute the non-atomic
    748         // one anyway and the atomic one might never have become visible.
    749         if (LastStore) {
    750           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
    751           assert(LastStoreMemInst.isUnordered() &&
    752                  !LastStoreMemInst.isVolatile() &&
    753                  "Violated invariant");
    754           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
    755             DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
    756                          << "  due to: " << *Inst << '\n');
    757             LastStore->eraseFromParent();
    758             Changed = true;
    759             ++NumDSE;
    760             LastStore = nullptr;
    761           }
    762           // fallthrough - we can exploit information about this store
    763         }
    764 
    765         // Okay, we just invalidated anything we knew about loaded values.  Try
    766         // to salvage *something* by remembering that the stored value is a live
    767         // version of the pointer.  It is safe to forward from volatile stores
    768         // to non-volatile loads, so we don't have to check for volatility of
    769         // the store.
    770         AvailableLoads.insert(
    771             MemInst.getPointerOperand(),
    772             LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
    773                       MemInst.isAtomic(), /*IsInvariant=*/false));
    774 
    775         // Remember that this was the last unordered store we saw for DSE. We
    776         // don't yet handle DSE on ordered or volatile stores since we don't
    777         // have a good way to model the ordering requirement for following
    778         // passes  once the store is removed.  We could insert a fence, but
    779         // since fences are slightly stronger than stores in their ordering,
    780         // it's not clear this is a profitable transform. Another option would
    781         // be to merge the ordering with that of the post dominating store.
    782         if (MemInst.isUnordered() && !MemInst.isVolatile())
    783           LastStore = Inst;
    784         else
    785           LastStore = nullptr;
    786       }
    787     }
    788   }
    789 
    790   return Changed;
    791 }
    792 
    793 bool EarlyCSE::run() {
    794   // Note, deque is being used here because there is significant performance
    795   // gains over vector when the container becomes very large due to the
    796   // specific access patterns. For more information see the mailing list
    797   // discussion on this:
    798   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
    799   std::deque<StackNode *> nodesToProcess;
    800 
    801   bool Changed = false;
    802 
    803   // Process the root node.
    804   nodesToProcess.push_back(new StackNode(
    805       AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
    806       DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
    807 
    808   // Save the current generation.
    809   unsigned LiveOutGeneration = CurrentGeneration;
    810 
    811   // Process the stack.
    812   while (!nodesToProcess.empty()) {
    813     // Grab the first item off the stack. Set the current generation, remove
    814     // the node from the stack, and process it.
    815     StackNode *NodeToProcess = nodesToProcess.back();
    816 
    817     // Initialize class members.
    818     CurrentGeneration = NodeToProcess->currentGeneration();
    819 
    820     // Check if the node needs to be processed.
    821     if (!NodeToProcess->isProcessed()) {
    822       // Process the node.
    823       Changed |= processNode(NodeToProcess->node());
    824       NodeToProcess->childGeneration(CurrentGeneration);
    825       NodeToProcess->process();
    826     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
    827       // Push the next child onto the stack.
    828       DomTreeNode *child = NodeToProcess->nextChild();
    829       nodesToProcess.push_back(
    830           new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
    831                         NodeToProcess->childGeneration(), child, child->begin(),
    832                         child->end()));
    833     } else {
    834       // It has been processed, and there are no more children to process,
    835       // so delete it and pop it off the stack.
    836       delete NodeToProcess;
    837       nodesToProcess.pop_back();
    838     }
    839   } // while (!nodes...)
    840 
    841   // Reset the current generation.
    842   CurrentGeneration = LiveOutGeneration;
    843 
    844   return Changed;
    845 }
    846 
    847 PreservedAnalyses EarlyCSEPass::run(Function &F,
    848                                     AnalysisManager<Function> &AM) {
    849   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
    850   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
    851   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
    852   auto &AC = AM.getResult<AssumptionAnalysis>(F);
    853 
    854   EarlyCSE CSE(TLI, TTI, DT, AC);
    855 
    856   if (!CSE.run())
    857     return PreservedAnalyses::all();
    858 
    859   // CSE preserves the dominator tree because it doesn't mutate the CFG.
    860   // FIXME: Bundle this with other CFG-preservation.
    861   PreservedAnalyses PA;
    862   PA.preserve<DominatorTreeAnalysis>();
    863   PA.preserve<GlobalsAA>();
    864   return PA;
    865 }
    866 
    867 namespace {
    868 /// \brief A simple and fast domtree-based CSE pass.
    869 ///
    870 /// This pass does a simple depth-first walk over the dominator tree,
    871 /// eliminating trivially redundant instructions and using instsimplify to
    872 /// canonicalize things as it goes. It is intended to be fast and catch obvious
    873 /// cases so that instcombine and other passes are more effective. It is
    874 /// expected that a later pass of GVN will catch the interesting/hard cases.
    875 class EarlyCSELegacyPass : public FunctionPass {
    876 public:
    877   static char ID;
    878 
    879   EarlyCSELegacyPass() : FunctionPass(ID) {
    880     initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
    881   }
    882 
    883   bool runOnFunction(Function &F) override {
    884     if (skipFunction(F))
    885       return false;
    886 
    887     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
    888     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
    889     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    890     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
    891 
    892     EarlyCSE CSE(TLI, TTI, DT, AC);
    893 
    894     return CSE.run();
    895   }
    896 
    897   void getAnalysisUsage(AnalysisUsage &AU) const override {
    898     AU.addRequired<AssumptionCacheTracker>();
    899     AU.addRequired<DominatorTreeWrapperPass>();
    900     AU.addRequired<TargetLibraryInfoWrapperPass>();
    901     AU.addRequired<TargetTransformInfoWrapperPass>();
    902     AU.addPreserved<GlobalsAAWrapperPass>();
    903     AU.setPreservesCFG();
    904   }
    905 };
    906 }
    907 
    908 char EarlyCSELegacyPass::ID = 0;
    909 
    910 FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
    911 
    912 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
    913                       false)
    914 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
    915 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
    916 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
    917 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
    918 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
    919