1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs a simple dominator tree walk that eliminates trivially 11 // redundant instructions. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "early-cse" 16 #include "llvm/Transforms/Scalar.h" 17 #include "llvm/Instructions.h" 18 #include "llvm/Pass.h" 19 #include "llvm/Analysis/Dominators.h" 20 #include "llvm/Analysis/InstructionSimplify.h" 21 #include "llvm/Target/TargetData.h" 22 #include "llvm/Target/TargetLibraryInfo.h" 23 #include "llvm/Transforms/Utils/Local.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/RecyclingAllocator.h" 26 #include "llvm/ADT/ScopedHashTable.h" 27 #include "llvm/ADT/Statistic.h" 28 #include <deque> 29 using namespace llvm; 30 31 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); 32 STATISTIC(NumCSE, "Number of instructions CSE'd"); 33 STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); 34 STATISTIC(NumCSECall, "Number of call instructions CSE'd"); 35 STATISTIC(NumDSE, "Number of trivial dead stores removed"); 36 37 static unsigned getHash(const void *V) { 38 return DenseMapInfo<const void*>::getHashValue(V); 39 } 40 41 //===----------------------------------------------------------------------===// 42 // SimpleValue 43 //===----------------------------------------------------------------------===// 44 45 namespace { 46 /// SimpleValue - Instances of this struct represent available values in the 47 /// scoped hash table. 48 struct SimpleValue { 49 Instruction *Inst; 50 51 SimpleValue(Instruction *I) : Inst(I) { 52 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); 53 } 54 55 bool isSentinel() const { 56 return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || 57 Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); 58 } 59 60 static bool canHandle(Instruction *Inst) { 61 // This can only handle non-void readnone functions. 62 if (CallInst *CI = dyn_cast<CallInst>(Inst)) 63 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); 64 return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) || 65 isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) || 66 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || 67 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) || 68 isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst); 69 } 70 }; 71 } 72 73 namespace llvm { 74 // SimpleValue is POD. 75 template<> struct isPodLike<SimpleValue> { 76 static const bool value = true; 77 }; 78 79 template<> struct DenseMapInfo<SimpleValue> { 80 static inline SimpleValue getEmptyKey() { 81 return DenseMapInfo<Instruction*>::getEmptyKey(); 82 } 83 static inline SimpleValue getTombstoneKey() { 84 return DenseMapInfo<Instruction*>::getTombstoneKey(); 85 } 86 static unsigned getHashValue(SimpleValue Val); 87 static bool isEqual(SimpleValue LHS, SimpleValue RHS); 88 }; 89 } 90 91 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) { 92 Instruction *Inst = Val.Inst; 93 94 // Hash in all of the operands as pointers. 95 unsigned Res = 0; 96 for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) 97 Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); 98 99 if (CastInst *CI = dyn_cast<CastInst>(Inst)) 100 Res ^= getHash(CI->getType()); 101 else if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) 102 Res ^= CI->getPredicate(); 103 else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) { 104 for (ExtractValueInst::idx_iterator I = EVI->idx_begin(), 105 E = EVI->idx_end(); I != E; ++I) 106 Res ^= *I; 107 } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) { 108 for (InsertValueInst::idx_iterator I = IVI->idx_begin(), 109 E = IVI->idx_end(); I != E; ++I) 110 Res ^= *I; 111 } else { 112 // nothing extra to hash in. 113 assert((isa<CallInst>(Inst) || 114 isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) || 115 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || 116 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) && 117 "Invalid/unknown instruction"); 118 } 119 120 // Mix in the opcode. 121 return (Res << 1) ^ Inst->getOpcode(); 122 } 123 124 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) { 125 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; 126 127 if (LHS.isSentinel() || RHS.isSentinel()) 128 return LHSI == RHSI; 129 130 if (LHSI->getOpcode() != RHSI->getOpcode()) return false; 131 return LHSI->isIdenticalTo(RHSI); 132 } 133 134 //===----------------------------------------------------------------------===// 135 // CallValue 136 //===----------------------------------------------------------------------===// 137 138 namespace { 139 /// CallValue - Instances of this struct represent available call values in 140 /// the scoped hash table. 141 struct CallValue { 142 Instruction *Inst; 143 144 CallValue(Instruction *I) : Inst(I) { 145 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); 146 } 147 148 bool isSentinel() const { 149 return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || 150 Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); 151 } 152 153 static bool canHandle(Instruction *Inst) { 154 // Don't value number anything that returns void. 155 if (Inst->getType()->isVoidTy()) 156 return false; 157 158 CallInst *CI = dyn_cast<CallInst>(Inst); 159 if (CI == 0 || !CI->onlyReadsMemory()) 160 return false; 161 return true; 162 } 163 }; 164 } 165 166 namespace llvm { 167 // CallValue is POD. 168 template<> struct isPodLike<CallValue> { 169 static const bool value = true; 170 }; 171 172 template<> struct DenseMapInfo<CallValue> { 173 static inline CallValue getEmptyKey() { 174 return DenseMapInfo<Instruction*>::getEmptyKey(); 175 } 176 static inline CallValue getTombstoneKey() { 177 return DenseMapInfo<Instruction*>::getTombstoneKey(); 178 } 179 static unsigned getHashValue(CallValue Val); 180 static bool isEqual(CallValue LHS, CallValue RHS); 181 }; 182 } 183 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { 184 Instruction *Inst = Val.Inst; 185 // Hash in all of the operands as pointers. 186 unsigned Res = 0; 187 for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) { 188 assert(!Inst->getOperand(i)->getType()->isMetadataTy() && 189 "Cannot value number calls with metadata operands"); 190 Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); 191 } 192 193 // Mix in the opcode. 194 return (Res << 1) ^ Inst->getOpcode(); 195 } 196 197 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) { 198 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; 199 if (LHS.isSentinel() || RHS.isSentinel()) 200 return LHSI == RHSI; 201 return LHSI->isIdenticalTo(RHSI); 202 } 203 204 205 //===----------------------------------------------------------------------===// 206 // EarlyCSE pass. 207 //===----------------------------------------------------------------------===// 208 209 namespace { 210 211 /// EarlyCSE - This pass does a simple depth-first walk over the dominator 212 /// tree, eliminating trivially redundant instructions and using instsimplify 213 /// to canonicalize things as it goes. It is intended to be fast and catch 214 /// obvious cases so that instcombine and other passes are more effective. It 215 /// is expected that a later pass of GVN will catch the interesting/hard 216 /// cases. 217 class EarlyCSE : public FunctionPass { 218 public: 219 const TargetData *TD; 220 const TargetLibraryInfo *TLI; 221 DominatorTree *DT; 222 typedef RecyclingAllocator<BumpPtrAllocator, 223 ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy; 224 typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>, 225 AllocatorTy> ScopedHTType; 226 227 /// AvailableValues - This scoped hash table contains the current values of 228 /// all of our simple scalar expressions. As we walk down the domtree, we 229 /// look to see if instructions are in this: if so, we replace them with what 230 /// we find, otherwise we insert them so that dominated values can succeed in 231 /// their lookup. 232 ScopedHTType *AvailableValues; 233 234 /// AvailableLoads - This scoped hash table contains the current values 235 /// of loads. This allows us to get efficient access to dominating loads when 236 /// we have a fully redundant load. In addition to the most recent load, we 237 /// keep track of a generation count of the read, which is compared against 238 /// the current generation count. The current generation count is 239 /// incremented after every possibly writing memory operation, which ensures 240 /// that we only CSE loads with other loads that have no intervening store. 241 typedef RecyclingAllocator<BumpPtrAllocator, 242 ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator; 243 typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>, 244 DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType; 245 LoadHTType *AvailableLoads; 246 247 /// AvailableCalls - This scoped hash table contains the current values 248 /// of read-only call values. It uses the same generation count as loads. 249 typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType; 250 CallHTType *AvailableCalls; 251 252 /// CurrentGeneration - This is the current generation of the memory value. 253 unsigned CurrentGeneration; 254 255 static char ID; 256 explicit EarlyCSE() : FunctionPass(ID) { 257 initializeEarlyCSEPass(*PassRegistry::getPassRegistry()); 258 } 259 260 bool runOnFunction(Function &F); 261 262 private: 263 264 // NodeScope - almost a POD, but needs to call the constructors for the 265 // scoped hash tables so that a new scope gets pushed on. These are RAII so 266 // that the scope gets popped when the NodeScope is destroyed. 267 class NodeScope { 268 public: 269 NodeScope(ScopedHTType *availableValues, 270 LoadHTType *availableLoads, 271 CallHTType *availableCalls) : 272 Scope(*availableValues), 273 LoadScope(*availableLoads), 274 CallScope(*availableCalls) {} 275 276 private: 277 NodeScope(const NodeScope&); // DO NOT IMPLEMENT 278 279 ScopedHTType::ScopeTy Scope; 280 LoadHTType::ScopeTy LoadScope; 281 CallHTType::ScopeTy CallScope; 282 }; 283 284 // StackNode - contains all the needed information to create a stack for 285 // doing a depth first tranversal of the tree. This includes scopes for 286 // values, loads, and calls as well as the generation. There is a child 287 // iterator so that the children do not need to be store spearately. 288 class StackNode { 289 public: 290 StackNode(ScopedHTType *availableValues, 291 LoadHTType *availableLoads, 292 CallHTType *availableCalls, 293 unsigned cg, DomTreeNode *n, 294 DomTreeNode::iterator child, DomTreeNode::iterator end) : 295 CurrentGeneration(cg), ChildGeneration(cg), Node(n), 296 ChildIter(child), EndIter(end), 297 Scopes(availableValues, availableLoads, availableCalls), 298 Processed(false) {} 299 300 // Accessors. 301 unsigned currentGeneration() { return CurrentGeneration; } 302 unsigned childGeneration() { return ChildGeneration; } 303 void childGeneration(unsigned generation) { ChildGeneration = generation; } 304 DomTreeNode *node() { return Node; } 305 DomTreeNode::iterator childIter() { return ChildIter; } 306 DomTreeNode *nextChild() { 307 DomTreeNode *child = *ChildIter; 308 ++ChildIter; 309 return child; 310 } 311 DomTreeNode::iterator end() { return EndIter; } 312 bool isProcessed() { return Processed; } 313 void process() { Processed = true; } 314 315 private: 316 StackNode(const StackNode&); // DO NOT IMPLEMENT 317 318 // Members. 319 unsigned CurrentGeneration; 320 unsigned ChildGeneration; 321 DomTreeNode *Node; 322 DomTreeNode::iterator ChildIter; 323 DomTreeNode::iterator EndIter; 324 NodeScope Scopes; 325 bool Processed; 326 }; 327 328 bool processNode(DomTreeNode *Node); 329 330 // This transformation requires dominator postdominator info 331 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 332 AU.addRequired<DominatorTree>(); 333 AU.addRequired<TargetLibraryInfo>(); 334 AU.setPreservesCFG(); 335 } 336 }; 337 } 338 339 char EarlyCSE::ID = 0; 340 341 // createEarlyCSEPass - The public interface to this file. 342 FunctionPass *llvm::createEarlyCSEPass() { 343 return new EarlyCSE(); 344 } 345 346 INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false) 347 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 348 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 349 INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) 350 351 bool EarlyCSE::processNode(DomTreeNode *Node) { 352 BasicBlock *BB = Node->getBlock(); 353 354 // If this block has a single predecessor, then the predecessor is the parent 355 // of the domtree node and all of the live out memory values are still current 356 // in this block. If this block has multiple predecessors, then they could 357 // have invalidated the live-out memory values of our parent value. For now, 358 // just be conservative and invalidate memory if this block has multiple 359 // predecessors. 360 if (BB->getSinglePredecessor() == 0) 361 ++CurrentGeneration; 362 363 /// LastStore - Keep track of the last non-volatile store that we saw... for 364 /// as long as there in no instruction that reads memory. If we see a store 365 /// to the same location, we delete the dead store. This zaps trivial dead 366 /// stores which can occur in bitfield code among other things. 367 StoreInst *LastStore = 0; 368 369 bool Changed = false; 370 371 // See if any instructions in the block can be eliminated. If so, do it. If 372 // not, add them to AvailableValues. 373 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 374 Instruction *Inst = I++; 375 376 // Dead instructions should just be removed. 377 if (isInstructionTriviallyDead(Inst, TLI)) { 378 DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); 379 Inst->eraseFromParent(); 380 Changed = true; 381 ++NumSimplify; 382 continue; 383 } 384 385 // If the instruction can be simplified (e.g. X+0 = X) then replace it with 386 // its simpler value. 387 if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) { 388 DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); 389 Inst->replaceAllUsesWith(V); 390 Inst->eraseFromParent(); 391 Changed = true; 392 ++NumSimplify; 393 continue; 394 } 395 396 // If this is a simple instruction that we can value number, process it. 397 if (SimpleValue::canHandle(Inst)) { 398 // See if the instruction has an available value. If so, use it. 399 if (Value *V = AvailableValues->lookup(Inst)) { 400 DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n'); 401 Inst->replaceAllUsesWith(V); 402 Inst->eraseFromParent(); 403 Changed = true; 404 ++NumCSE; 405 continue; 406 } 407 408 // Otherwise, just remember that this value is available. 409 AvailableValues->insert(Inst, Inst); 410 continue; 411 } 412 413 // If this is a non-volatile load, process it. 414 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 415 // Ignore volatile loads. 416 if (!LI->isSimple()) { 417 LastStore = 0; 418 continue; 419 } 420 421 // If we have an available version of this load, and if it is the right 422 // generation, replace this instruction. 423 std::pair<Value*, unsigned> InVal = 424 AvailableLoads->lookup(Inst->getOperand(0)); 425 if (InVal.first != 0 && InVal.second == CurrentGeneration) { 426 DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " 427 << *InVal.first << '\n'); 428 if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); 429 Inst->eraseFromParent(); 430 Changed = true; 431 ++NumCSELoad; 432 continue; 433 } 434 435 // Otherwise, remember that we have this instruction. 436 AvailableLoads->insert(Inst->getOperand(0), 437 std::pair<Value*, unsigned>(Inst, CurrentGeneration)); 438 LastStore = 0; 439 continue; 440 } 441 442 // If this instruction may read from memory, forget LastStore. 443 if (Inst->mayReadFromMemory()) 444 LastStore = 0; 445 446 // If this is a read-only call, process it. 447 if (CallValue::canHandle(Inst)) { 448 // If we have an available version of this call, and if it is the right 449 // generation, replace this instruction. 450 std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst); 451 if (InVal.first != 0 && InVal.second == CurrentGeneration) { 452 DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: " 453 << *InVal.first << '\n'); 454 if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); 455 Inst->eraseFromParent(); 456 Changed = true; 457 ++NumCSECall; 458 continue; 459 } 460 461 // Otherwise, remember that we have this instruction. 462 AvailableCalls->insert(Inst, 463 std::pair<Value*, unsigned>(Inst, CurrentGeneration)); 464 continue; 465 } 466 467 // Okay, this isn't something we can CSE at all. Check to see if it is 468 // something that could modify memory. If so, our available memory values 469 // cannot be used so bump the generation count. 470 if (Inst->mayWriteToMemory()) { 471 ++CurrentGeneration; 472 473 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 474 // We do a trivial form of DSE if there are two stores to the same 475 // location with no intervening loads. Delete the earlier store. 476 if (LastStore && 477 LastStore->getPointerOperand() == SI->getPointerOperand()) { 478 DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " 479 << *Inst << '\n'); 480 LastStore->eraseFromParent(); 481 Changed = true; 482 ++NumDSE; 483 LastStore = 0; 484 continue; 485 } 486 487 // Okay, we just invalidated anything we knew about loaded values. Try 488 // to salvage *something* by remembering that the stored value is a live 489 // version of the pointer. It is safe to forward from volatile stores 490 // to non-volatile loads, so we don't have to check for volatility of 491 // the store. 492 AvailableLoads->insert(SI->getPointerOperand(), 493 std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration)); 494 495 // Remember that this was the last store we saw for DSE. 496 if (SI->isSimple()) 497 LastStore = SI; 498 } 499 } 500 } 501 502 return Changed; 503 } 504 505 506 bool EarlyCSE::runOnFunction(Function &F) { 507 std::deque<StackNode *> nodesToProcess; 508 509 TD = getAnalysisIfAvailable<TargetData>(); 510 TLI = &getAnalysis<TargetLibraryInfo>(); 511 DT = &getAnalysis<DominatorTree>(); 512 513 // Tables that the pass uses when walking the domtree. 514 ScopedHTType AVTable; 515 AvailableValues = &AVTable; 516 LoadHTType LoadTable; 517 AvailableLoads = &LoadTable; 518 CallHTType CallTable; 519 AvailableCalls = &CallTable; 520 521 CurrentGeneration = 0; 522 bool Changed = false; 523 524 // Process the root node. 525 nodesToProcess.push_front( 526 new StackNode(AvailableValues, AvailableLoads, AvailableCalls, 527 CurrentGeneration, DT->getRootNode(), 528 DT->getRootNode()->begin(), 529 DT->getRootNode()->end())); 530 531 // Save the current generation. 532 unsigned LiveOutGeneration = CurrentGeneration; 533 534 // Process the stack. 535 while (!nodesToProcess.empty()) { 536 // Grab the first item off the stack. Set the current generation, remove 537 // the node from the stack, and process it. 538 StackNode *NodeToProcess = nodesToProcess.front(); 539 540 // Initialize class members. 541 CurrentGeneration = NodeToProcess->currentGeneration(); 542 543 // Check if the node needs to be processed. 544 if (!NodeToProcess->isProcessed()) { 545 // Process the node. 546 Changed |= processNode(NodeToProcess->node()); 547 NodeToProcess->childGeneration(CurrentGeneration); 548 NodeToProcess->process(); 549 } else if (NodeToProcess->childIter() != NodeToProcess->end()) { 550 // Push the next child onto the stack. 551 DomTreeNode *child = NodeToProcess->nextChild(); 552 nodesToProcess.push_front( 553 new StackNode(AvailableValues, 554 AvailableLoads, 555 AvailableCalls, 556 NodeToProcess->childGeneration(), child, 557 child->begin(), child->end())); 558 } else { 559 // It has been processed, and there are no more children to process, 560 // so delete it and pop it off the stack. 561 delete NodeToProcess; 562 nodesToProcess.pop_front(); 563 } 564 } // while (!nodes...) 565 566 // Reset the current generation. 567 CurrentGeneration = LiveOutGeneration; 568 569 return Changed; 570 } 571