Home | History | Annotate | Download | only in Scalar
      1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass performs a simple dominator tree walk that eliminates trivially
     11 // redundant instructions.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "early-cse"
     16 #include "llvm/Transforms/Scalar.h"
     17 #include "llvm/Instructions.h"
     18 #include "llvm/Pass.h"
     19 #include "llvm/Analysis/Dominators.h"
     20 #include "llvm/Analysis/InstructionSimplify.h"
     21 #include "llvm/Target/TargetData.h"
     22 #include "llvm/Target/TargetLibraryInfo.h"
     23 #include "llvm/Transforms/Utils/Local.h"
     24 #include "llvm/Support/Debug.h"
     25 #include "llvm/Support/RecyclingAllocator.h"
     26 #include "llvm/ADT/ScopedHashTable.h"
     27 #include "llvm/ADT/Statistic.h"
     28 #include <deque>
     29 using namespace llvm;
     30 
     31 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
     32 STATISTIC(NumCSE,      "Number of instructions CSE'd");
     33 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
     34 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
     35 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
     36 
     37 static unsigned getHash(const void *V) {
     38   return DenseMapInfo<const void*>::getHashValue(V);
     39 }
     40 
     41 //===----------------------------------------------------------------------===//
     42 // SimpleValue
     43 //===----------------------------------------------------------------------===//
     44 
     45 namespace {
     46   /// SimpleValue - Instances of this struct represent available values in the
     47   /// scoped hash table.
     48   struct SimpleValue {
     49     Instruction *Inst;
     50 
     51     SimpleValue(Instruction *I) : Inst(I) {
     52       assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
     53     }
     54 
     55     bool isSentinel() const {
     56       return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
     57              Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
     58     }
     59 
     60     static bool canHandle(Instruction *Inst) {
     61       // This can only handle non-void readnone functions.
     62       if (CallInst *CI = dyn_cast<CallInst>(Inst))
     63         return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
     64       return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
     65              isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
     66              isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
     67              isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
     68              isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
     69     }
     70   };
     71 }
     72 
     73 namespace llvm {
     74 // SimpleValue is POD.
     75 template<> struct isPodLike<SimpleValue> {
     76   static const bool value = true;
     77 };
     78 
     79 template<> struct DenseMapInfo<SimpleValue> {
     80   static inline SimpleValue getEmptyKey() {
     81     return DenseMapInfo<Instruction*>::getEmptyKey();
     82   }
     83   static inline SimpleValue getTombstoneKey() {
     84     return DenseMapInfo<Instruction*>::getTombstoneKey();
     85   }
     86   static unsigned getHashValue(SimpleValue Val);
     87   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
     88 };
     89 }
     90 
     91 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
     92   Instruction *Inst = Val.Inst;
     93 
     94   // Hash in all of the operands as pointers.
     95   unsigned Res = 0;
     96   for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
     97     Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
     98 
     99   if (CastInst *CI = dyn_cast<CastInst>(Inst))
    100     Res ^= getHash(CI->getType());
    101   else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
    102     Res ^= CI->getPredicate();
    103   else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
    104     for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
    105          E = EVI->idx_end(); I != E; ++I)
    106       Res ^= *I;
    107   } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
    108     for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
    109          E = IVI->idx_end(); I != E; ++I)
    110       Res ^= *I;
    111   } else {
    112     // nothing extra to hash in.
    113     assert((isa<CallInst>(Inst) ||
    114             isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
    115             isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
    116             isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
    117            "Invalid/unknown instruction");
    118   }
    119 
    120   // Mix in the opcode.
    121   return (Res << 1) ^ Inst->getOpcode();
    122 }
    123 
    124 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
    125   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
    126 
    127   if (LHS.isSentinel() || RHS.isSentinel())
    128     return LHSI == RHSI;
    129 
    130   if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
    131   return LHSI->isIdenticalTo(RHSI);
    132 }
    133 
    134 //===----------------------------------------------------------------------===//
    135 // CallValue
    136 //===----------------------------------------------------------------------===//
    137 
    138 namespace {
    139   /// CallValue - Instances of this struct represent available call values in
    140   /// the scoped hash table.
    141   struct CallValue {
    142     Instruction *Inst;
    143 
    144     CallValue(Instruction *I) : Inst(I) {
    145       assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
    146     }
    147 
    148     bool isSentinel() const {
    149       return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
    150              Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
    151     }
    152 
    153     static bool canHandle(Instruction *Inst) {
    154       // Don't value number anything that returns void.
    155       if (Inst->getType()->isVoidTy())
    156         return false;
    157 
    158       CallInst *CI = dyn_cast<CallInst>(Inst);
    159       if (CI == 0 || !CI->onlyReadsMemory())
    160         return false;
    161       return true;
    162     }
    163   };
    164 }
    165 
    166 namespace llvm {
    167   // CallValue is POD.
    168   template<> struct isPodLike<CallValue> {
    169     static const bool value = true;
    170   };
    171 
    172   template<> struct DenseMapInfo<CallValue> {
    173     static inline CallValue getEmptyKey() {
    174       return DenseMapInfo<Instruction*>::getEmptyKey();
    175     }
    176     static inline CallValue getTombstoneKey() {
    177       return DenseMapInfo<Instruction*>::getTombstoneKey();
    178     }
    179     static unsigned getHashValue(CallValue Val);
    180     static bool isEqual(CallValue LHS, CallValue RHS);
    181   };
    182 }
    183 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
    184   Instruction *Inst = Val.Inst;
    185   // Hash in all of the operands as pointers.
    186   unsigned Res = 0;
    187   for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
    188     assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
    189            "Cannot value number calls with metadata operands");
    190     Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
    191   }
    192 
    193   // Mix in the opcode.
    194   return (Res << 1) ^ Inst->getOpcode();
    195 }
    196 
    197 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
    198   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
    199   if (LHS.isSentinel() || RHS.isSentinel())
    200     return LHSI == RHSI;
    201   return LHSI->isIdenticalTo(RHSI);
    202 }
    203 
    204 
    205 //===----------------------------------------------------------------------===//
    206 // EarlyCSE pass.
    207 //===----------------------------------------------------------------------===//
    208 
    209 namespace {
    210 
    211 /// EarlyCSE - This pass does a simple depth-first walk over the dominator
    212 /// tree, eliminating trivially redundant instructions and using instsimplify
    213 /// to canonicalize things as it goes.  It is intended to be fast and catch
    214 /// obvious cases so that instcombine and other passes are more effective.  It
    215 /// is expected that a later pass of GVN will catch the interesting/hard
    216 /// cases.
    217 class EarlyCSE : public FunctionPass {
    218 public:
    219   const TargetData *TD;
    220   const TargetLibraryInfo *TLI;
    221   DominatorTree *DT;
    222   typedef RecyclingAllocator<BumpPtrAllocator,
    223                       ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
    224   typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
    225                           AllocatorTy> ScopedHTType;
    226 
    227   /// AvailableValues - This scoped hash table contains the current values of
    228   /// all of our simple scalar expressions.  As we walk down the domtree, we
    229   /// look to see if instructions are in this: if so, we replace them with what
    230   /// we find, otherwise we insert them so that dominated values can succeed in
    231   /// their lookup.
    232   ScopedHTType *AvailableValues;
    233 
    234   /// AvailableLoads - This scoped hash table contains the current values
    235   /// of loads.  This allows us to get efficient access to dominating loads when
    236   /// we have a fully redundant load.  In addition to the most recent load, we
    237   /// keep track of a generation count of the read, which is compared against
    238   /// the current generation count.  The current generation count is
    239   /// incremented after every possibly writing memory operation, which ensures
    240   /// that we only CSE loads with other loads that have no intervening store.
    241   typedef RecyclingAllocator<BumpPtrAllocator,
    242     ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
    243   typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
    244                           DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
    245   LoadHTType *AvailableLoads;
    246 
    247   /// AvailableCalls - This scoped hash table contains the current values
    248   /// of read-only call values.  It uses the same generation count as loads.
    249   typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
    250   CallHTType *AvailableCalls;
    251 
    252   /// CurrentGeneration - This is the current generation of the memory value.
    253   unsigned CurrentGeneration;
    254 
    255   static char ID;
    256   explicit EarlyCSE() : FunctionPass(ID) {
    257     initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
    258   }
    259 
    260   bool runOnFunction(Function &F);
    261 
    262 private:
    263 
    264   // NodeScope - almost a POD, but needs to call the constructors for the
    265   // scoped hash tables so that a new scope gets pushed on. These are RAII so
    266   // that the scope gets popped when the NodeScope is destroyed.
    267   class NodeScope {
    268    public:
    269     NodeScope(ScopedHTType *availableValues,
    270               LoadHTType *availableLoads,
    271               CallHTType *availableCalls) :
    272         Scope(*availableValues),
    273         LoadScope(*availableLoads),
    274         CallScope(*availableCalls) {}
    275 
    276    private:
    277     NodeScope(const NodeScope&); // DO NOT IMPLEMENT
    278 
    279     ScopedHTType::ScopeTy Scope;
    280     LoadHTType::ScopeTy LoadScope;
    281     CallHTType::ScopeTy CallScope;
    282   };
    283 
    284   // StackNode - contains all the needed information to create a stack for
    285   // doing a depth first tranversal of the tree. This includes scopes for
    286   // values, loads, and calls as well as the generation. There is a child
    287   // iterator so that the children do not need to be store spearately.
    288   class StackNode {
    289    public:
    290     StackNode(ScopedHTType *availableValues,
    291               LoadHTType *availableLoads,
    292               CallHTType *availableCalls,
    293               unsigned cg, DomTreeNode *n,
    294               DomTreeNode::iterator child, DomTreeNode::iterator end) :
    295         CurrentGeneration(cg), ChildGeneration(cg), Node(n),
    296         ChildIter(child), EndIter(end),
    297         Scopes(availableValues, availableLoads, availableCalls),
    298         Processed(false) {}
    299 
    300     // Accessors.
    301     unsigned currentGeneration() { return CurrentGeneration; }
    302     unsigned childGeneration() { return ChildGeneration; }
    303     void childGeneration(unsigned generation) { ChildGeneration = generation; }
    304     DomTreeNode *node() { return Node; }
    305     DomTreeNode::iterator childIter() { return ChildIter; }
    306     DomTreeNode *nextChild() {
    307       DomTreeNode *child = *ChildIter;
    308       ++ChildIter;
    309       return child;
    310     }
    311     DomTreeNode::iterator end() { return EndIter; }
    312     bool isProcessed() { return Processed; }
    313     void process() { Processed = true; }
    314 
    315    private:
    316     StackNode(const StackNode&); // DO NOT IMPLEMENT
    317 
    318     // Members.
    319     unsigned CurrentGeneration;
    320     unsigned ChildGeneration;
    321     DomTreeNode *Node;
    322     DomTreeNode::iterator ChildIter;
    323     DomTreeNode::iterator EndIter;
    324     NodeScope Scopes;
    325     bool Processed;
    326   };
    327 
    328   bool processNode(DomTreeNode *Node);
    329 
    330   // This transformation requires dominator postdominator info
    331   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
    332     AU.addRequired<DominatorTree>();
    333     AU.addRequired<TargetLibraryInfo>();
    334     AU.setPreservesCFG();
    335   }
    336 };
    337 }
    338 
    339 char EarlyCSE::ID = 0;
    340 
    341 // createEarlyCSEPass - The public interface to this file.
    342 FunctionPass *llvm::createEarlyCSEPass() {
    343   return new EarlyCSE();
    344 }
    345 
    346 INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
    347 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
    348 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
    349 INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
    350 
    351 bool EarlyCSE::processNode(DomTreeNode *Node) {
    352   BasicBlock *BB = Node->getBlock();
    353 
    354   // If this block has a single predecessor, then the predecessor is the parent
    355   // of the domtree node and all of the live out memory values are still current
    356   // in this block.  If this block has multiple predecessors, then they could
    357   // have invalidated the live-out memory values of our parent value.  For now,
    358   // just be conservative and invalidate memory if this block has multiple
    359   // predecessors.
    360   if (BB->getSinglePredecessor() == 0)
    361     ++CurrentGeneration;
    362 
    363   /// LastStore - Keep track of the last non-volatile store that we saw... for
    364   /// as long as there in no instruction that reads memory.  If we see a store
    365   /// to the same location, we delete the dead store.  This zaps trivial dead
    366   /// stores which can occur in bitfield code among other things.
    367   StoreInst *LastStore = 0;
    368 
    369   bool Changed = false;
    370 
    371   // See if any instructions in the block can be eliminated.  If so, do it.  If
    372   // not, add them to AvailableValues.
    373   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
    374     Instruction *Inst = I++;
    375 
    376     // Dead instructions should just be removed.
    377     if (isInstructionTriviallyDead(Inst)) {
    378       DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
    379       Inst->eraseFromParent();
    380       Changed = true;
    381       ++NumSimplify;
    382       continue;
    383     }
    384 
    385     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
    386     // its simpler value.
    387     if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
    388       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
    389       Inst->replaceAllUsesWith(V);
    390       Inst->eraseFromParent();
    391       Changed = true;
    392       ++NumSimplify;
    393       continue;
    394     }
    395 
    396     // If this is a simple instruction that we can value number, process it.
    397     if (SimpleValue::canHandle(Inst)) {
    398       // See if the instruction has an available value.  If so, use it.
    399       if (Value *V = AvailableValues->lookup(Inst)) {
    400         DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
    401         Inst->replaceAllUsesWith(V);
    402         Inst->eraseFromParent();
    403         Changed = true;
    404         ++NumCSE;
    405         continue;
    406       }
    407 
    408       // Otherwise, just remember that this value is available.
    409       AvailableValues->insert(Inst, Inst);
    410       continue;
    411     }
    412 
    413     // If this is a non-volatile load, process it.
    414     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    415       // Ignore volatile loads.
    416       if (!LI->isSimple()) {
    417         LastStore = 0;
    418         continue;
    419       }
    420 
    421       // If we have an available version of this load, and if it is the right
    422       // generation, replace this instruction.
    423       std::pair<Value*, unsigned> InVal =
    424         AvailableLoads->lookup(Inst->getOperand(0));
    425       if (InVal.first != 0 && InVal.second == CurrentGeneration) {
    426         DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << "  to: "
    427               << *InVal.first << '\n');
    428         if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
    429         Inst->eraseFromParent();
    430         Changed = true;
    431         ++NumCSELoad;
    432         continue;
    433       }
    434 
    435       // Otherwise, remember that we have this instruction.
    436       AvailableLoads->insert(Inst->getOperand(0),
    437                           std::pair<Value*, unsigned>(Inst, CurrentGeneration));
    438       LastStore = 0;
    439       continue;
    440     }
    441 
    442     // If this instruction may read from memory, forget LastStore.
    443     if (Inst->mayReadFromMemory())
    444       LastStore = 0;
    445 
    446     // If this is a read-only call, process it.
    447     if (CallValue::canHandle(Inst)) {
    448       // If we have an available version of this call, and if it is the right
    449       // generation, replace this instruction.
    450       std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
    451       if (InVal.first != 0 && InVal.second == CurrentGeneration) {
    452         DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << "  to: "
    453                      << *InVal.first << '\n');
    454         if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
    455         Inst->eraseFromParent();
    456         Changed = true;
    457         ++NumCSECall;
    458         continue;
    459       }
    460 
    461       // Otherwise, remember that we have this instruction.
    462       AvailableCalls->insert(Inst,
    463                          std::pair<Value*, unsigned>(Inst, CurrentGeneration));
    464       continue;
    465     }
    466 
    467     // Okay, this isn't something we can CSE at all.  Check to see if it is
    468     // something that could modify memory.  If so, our available memory values
    469     // cannot be used so bump the generation count.
    470     if (Inst->mayWriteToMemory()) {
    471       ++CurrentGeneration;
    472 
    473       if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    474         // We do a trivial form of DSE if there are two stores to the same
    475         // location with no intervening loads.  Delete the earlier store.
    476         if (LastStore &&
    477             LastStore->getPointerOperand() == SI->getPointerOperand()) {
    478           DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << "  due to: "
    479                        << *Inst << '\n');
    480           LastStore->eraseFromParent();
    481           Changed = true;
    482           ++NumDSE;
    483           LastStore = 0;
    484           continue;
    485         }
    486 
    487         // Okay, we just invalidated anything we knew about loaded values.  Try
    488         // to salvage *something* by remembering that the stored value is a live
    489         // version of the pointer.  It is safe to forward from volatile stores
    490         // to non-volatile loads, so we don't have to check for volatility of
    491         // the store.
    492         AvailableLoads->insert(SI->getPointerOperand(),
    493          std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
    494 
    495         // Remember that this was the last store we saw for DSE.
    496         if (SI->isSimple())
    497           LastStore = SI;
    498       }
    499     }
    500   }
    501 
    502   return Changed;
    503 }
    504 
    505 
    506 bool EarlyCSE::runOnFunction(Function &F) {
    507   std::deque<StackNode *> nodesToProcess;
    508 
    509   TD = getAnalysisIfAvailable<TargetData>();
    510   TLI = &getAnalysis<TargetLibraryInfo>();
    511   DT = &getAnalysis<DominatorTree>();
    512 
    513   // Tables that the pass uses when walking the domtree.
    514   ScopedHTType AVTable;
    515   AvailableValues = &AVTable;
    516   LoadHTType LoadTable;
    517   AvailableLoads = &LoadTable;
    518   CallHTType CallTable;
    519   AvailableCalls = &CallTable;
    520 
    521   CurrentGeneration = 0;
    522   bool Changed = false;
    523 
    524   // Process the root node.
    525   nodesToProcess.push_front(
    526       new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
    527                     CurrentGeneration, DT->getRootNode(),
    528                     DT->getRootNode()->begin(),
    529                     DT->getRootNode()->end()));
    530 
    531   // Save the current generation.
    532   unsigned LiveOutGeneration = CurrentGeneration;
    533 
    534   // Process the stack.
    535   while (!nodesToProcess.empty()) {
    536     // Grab the first item off the stack. Set the current generation, remove
    537     // the node from the stack, and process it.
    538     StackNode *NodeToProcess = nodesToProcess.front();
    539 
    540     // Initialize class members.
    541     CurrentGeneration = NodeToProcess->currentGeneration();
    542 
    543     // Check if the node needs to be processed.
    544     if (!NodeToProcess->isProcessed()) {
    545       // Process the node.
    546       Changed |= processNode(NodeToProcess->node());
    547       NodeToProcess->childGeneration(CurrentGeneration);
    548       NodeToProcess->process();
    549     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
    550       // Push the next child onto the stack.
    551       DomTreeNode *child = NodeToProcess->nextChild();
    552       nodesToProcess.push_front(
    553           new StackNode(AvailableValues,
    554                         AvailableLoads,
    555                         AvailableCalls,
    556                         NodeToProcess->childGeneration(), child,
    557                         child->begin(), child->end()));
    558     } else {
    559       // It has been processed, and there are no more children to process,
    560       // so delete it and pop it off the stack.
    561       delete NodeToProcess;
    562       nodesToProcess.pop_front();
    563     }
    564   } // while (!nodes...)
    565 
    566   // Reset the current generation.
    567   CurrentGeneration = LiveOutGeneration;
    568 
    569   return Changed;
    570 }
    571