1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global value numbering to eliminate fully redundant 11 // instructions. It also performs simple dead load elimination. 12 // 13 // Note that this pass does the value numbering itself; it does not use the 14 // ValueNumbering analysis passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Scalar.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/DepthFirstIterator.h" 21 #include "llvm/ADT/Hashing.h" 22 #include "llvm/ADT/MapVector.h" 23 #include "llvm/ADT/PostOrderIterator.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/AssumptionCache.h" 29 #include "llvm/Analysis/CFG.h" 30 #include "llvm/Analysis/ConstantFolding.h" 31 #include "llvm/Analysis/InstructionSimplify.h" 32 #include "llvm/Analysis/Loads.h" 33 #include "llvm/Analysis/MemoryBuiltins.h" 34 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 35 #include "llvm/Analysis/PHITransAddr.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/ValueTracking.h" 38 #include "llvm/IR/DataLayout.h" 39 #include "llvm/IR/Dominators.h" 40 #include "llvm/IR/GlobalVariable.h" 41 #include "llvm/IR/IRBuilder.h" 42 #include "llvm/IR/IntrinsicInst.h" 43 #include "llvm/IR/LLVMContext.h" 44 #include "llvm/IR/Metadata.h" 45 #include "llvm/IR/PatternMatch.h" 46 #include "llvm/Support/Allocator.h" 47 #include "llvm/Support/CommandLine.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 51 #include "llvm/Transforms/Utils/Local.h" 52 #include "llvm/Transforms/Utils/SSAUpdater.h" 53 #include <vector> 54 using namespace llvm; 55 using namespace PatternMatch; 56 57 #define DEBUG_TYPE "gvn" 58 59 STATISTIC(NumGVNInstr, "Number of instructions deleted"); 60 STATISTIC(NumGVNLoad, "Number of loads deleted"); 61 STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 62 STATISTIC(NumGVNBlocks, "Number of blocks merged"); 63 STATISTIC(NumGVNSimpl, "Number of instructions simplified"); 64 STATISTIC(NumGVNEqProp, "Number of equalities propagated"); 65 STATISTIC(NumPRELoad, "Number of loads PRE'd"); 66 67 static cl::opt<bool> EnablePRE("enable-pre", 68 cl::init(true), cl::Hidden); 69 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 70 71 // Maximum allowed recursion depth. 72 static cl::opt<uint32_t> 73 MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, 74 cl::desc("Max recurse depth (default = 1000)")); 75 76 //===----------------------------------------------------------------------===// 77 // ValueTable Class 78 //===----------------------------------------------------------------------===// 79 80 /// This class holds the mapping between values and value numbers. It is used 81 /// as an efficient mechanism to determine the expression-wise equivalence of 82 /// two values. 83 namespace { 84 struct Expression { 85 uint32_t opcode; 86 Type *type; 87 SmallVector<uint32_t, 4> varargs; 88 89 Expression(uint32_t o = ~2U) : opcode(o) { } 90 91 bool operator==(const Expression &other) const { 92 if (opcode != other.opcode) 93 return false; 94 if (opcode == ~0U || opcode == ~1U) 95 return true; 96 if (type != other.type) 97 return false; 98 if (varargs != other.varargs) 99 return false; 100 return true; 101 } 102 103 friend hash_code hash_value(const Expression &Value) { 104 return hash_combine(Value.opcode, Value.type, 105 hash_combine_range(Value.varargs.begin(), 106 Value.varargs.end())); 107 } 108 }; 109 110 class ValueTable { 111 DenseMap<Value*, uint32_t> valueNumbering; 112 DenseMap<Expression, uint32_t> expressionNumbering; 113 AliasAnalysis *AA; 114 MemoryDependenceAnalysis *MD; 115 DominatorTree *DT; 116 117 uint32_t nextValueNumber; 118 119 Expression create_expression(Instruction* I); 120 Expression create_cmp_expression(unsigned Opcode, 121 CmpInst::Predicate Predicate, 122 Value *LHS, Value *RHS); 123 Expression create_extractvalue_expression(ExtractValueInst* EI); 124 uint32_t lookup_or_add_call(CallInst* C); 125 public: 126 ValueTable() : nextValueNumber(1) { } 127 uint32_t lookup_or_add(Value *V); 128 uint32_t lookup(Value *V) const; 129 uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred, 130 Value *LHS, Value *RHS); 131 void add(Value *V, uint32_t num); 132 void clear(); 133 void erase(Value *v); 134 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 135 AliasAnalysis *getAliasAnalysis() const { return AA; } 136 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 137 void setDomTree(DominatorTree* D) { DT = D; } 138 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 139 void verifyRemoved(const Value *) const; 140 }; 141 } 142 143 namespace llvm { 144 template <> struct DenseMapInfo<Expression> { 145 static inline Expression getEmptyKey() { 146 return ~0U; 147 } 148 149 static inline Expression getTombstoneKey() { 150 return ~1U; 151 } 152 153 static unsigned getHashValue(const Expression e) { 154 using llvm::hash_value; 155 return static_cast<unsigned>(hash_value(e)); 156 } 157 static bool isEqual(const Expression &LHS, const Expression &RHS) { 158 return LHS == RHS; 159 } 160 }; 161 162 } 163 164 //===----------------------------------------------------------------------===// 165 // ValueTable Internal Functions 166 //===----------------------------------------------------------------------===// 167 168 Expression ValueTable::create_expression(Instruction *I) { 169 Expression e; 170 e.type = I->getType(); 171 e.opcode = I->getOpcode(); 172 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 173 OI != OE; ++OI) 174 e.varargs.push_back(lookup_or_add(*OI)); 175 if (I->isCommutative()) { 176 // Ensure that commutative instructions that only differ by a permutation 177 // of their operands get the same value number by sorting the operand value 178 // numbers. Since all commutative instructions have two operands it is more 179 // efficient to sort by hand rather than using, say, std::sort. 180 assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); 181 if (e.varargs[0] > e.varargs[1]) 182 std::swap(e.varargs[0], e.varargs[1]); 183 } 184 185 if (CmpInst *C = dyn_cast<CmpInst>(I)) { 186 // Sort the operand value numbers so x<y and y>x get the same value number. 187 CmpInst::Predicate Predicate = C->getPredicate(); 188 if (e.varargs[0] > e.varargs[1]) { 189 std::swap(e.varargs[0], e.varargs[1]); 190 Predicate = CmpInst::getSwappedPredicate(Predicate); 191 } 192 e.opcode = (C->getOpcode() << 8) | Predicate; 193 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 194 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 195 II != IE; ++II) 196 e.varargs.push_back(*II); 197 } 198 199 return e; 200 } 201 202 Expression ValueTable::create_cmp_expression(unsigned Opcode, 203 CmpInst::Predicate Predicate, 204 Value *LHS, Value *RHS) { 205 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 206 "Not a comparison!"); 207 Expression e; 208 e.type = CmpInst::makeCmpResultType(LHS->getType()); 209 e.varargs.push_back(lookup_or_add(LHS)); 210 e.varargs.push_back(lookup_or_add(RHS)); 211 212 // Sort the operand value numbers so x<y and y>x get the same value number. 213 if (e.varargs[0] > e.varargs[1]) { 214 std::swap(e.varargs[0], e.varargs[1]); 215 Predicate = CmpInst::getSwappedPredicate(Predicate); 216 } 217 e.opcode = (Opcode << 8) | Predicate; 218 return e; 219 } 220 221 Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { 222 assert(EI && "Not an ExtractValueInst?"); 223 Expression e; 224 e.type = EI->getType(); 225 e.opcode = 0; 226 227 IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); 228 if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { 229 // EI might be an extract from one of our recognised intrinsics. If it 230 // is we'll synthesize a semantically equivalent expression instead on 231 // an extract value expression. 232 switch (I->getIntrinsicID()) { 233 case Intrinsic::sadd_with_overflow: 234 case Intrinsic::uadd_with_overflow: 235 e.opcode = Instruction::Add; 236 break; 237 case Intrinsic::ssub_with_overflow: 238 case Intrinsic::usub_with_overflow: 239 e.opcode = Instruction::Sub; 240 break; 241 case Intrinsic::smul_with_overflow: 242 case Intrinsic::umul_with_overflow: 243 e.opcode = Instruction::Mul; 244 break; 245 default: 246 break; 247 } 248 249 if (e.opcode != 0) { 250 // Intrinsic recognized. Grab its args to finish building the expression. 251 assert(I->getNumArgOperands() == 2 && 252 "Expect two args for recognised intrinsics."); 253 e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); 254 e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); 255 return e; 256 } 257 } 258 259 // Not a recognised intrinsic. Fall back to producing an extract value 260 // expression. 261 e.opcode = EI->getOpcode(); 262 for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); 263 OI != OE; ++OI) 264 e.varargs.push_back(lookup_or_add(*OI)); 265 266 for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); 267 II != IE; ++II) 268 e.varargs.push_back(*II); 269 270 return e; 271 } 272 273 //===----------------------------------------------------------------------===// 274 // ValueTable External Functions 275 //===----------------------------------------------------------------------===// 276 277 /// add - Insert a value into the table with a specified value number. 278 void ValueTable::add(Value *V, uint32_t num) { 279 valueNumbering.insert(std::make_pair(V, num)); 280 } 281 282 uint32_t ValueTable::lookup_or_add_call(CallInst *C) { 283 if (AA->doesNotAccessMemory(C)) { 284 Expression exp = create_expression(C); 285 uint32_t &e = expressionNumbering[exp]; 286 if (!e) e = nextValueNumber++; 287 valueNumbering[C] = e; 288 return e; 289 } else if (AA->onlyReadsMemory(C)) { 290 Expression exp = create_expression(C); 291 uint32_t &e = expressionNumbering[exp]; 292 if (!e) { 293 e = nextValueNumber++; 294 valueNumbering[C] = e; 295 return e; 296 } 297 if (!MD) { 298 e = nextValueNumber++; 299 valueNumbering[C] = e; 300 return e; 301 } 302 303 MemDepResult local_dep = MD->getDependency(C); 304 305 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 306 valueNumbering[C] = nextValueNumber; 307 return nextValueNumber++; 308 } 309 310 if (local_dep.isDef()) { 311 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 312 313 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 314 valueNumbering[C] = nextValueNumber; 315 return nextValueNumber++; 316 } 317 318 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 319 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 320 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 321 if (c_vn != cd_vn) { 322 valueNumbering[C] = nextValueNumber; 323 return nextValueNumber++; 324 } 325 } 326 327 uint32_t v = lookup_or_add(local_cdep); 328 valueNumbering[C] = v; 329 return v; 330 } 331 332 // Non-local case. 333 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 334 MD->getNonLocalCallDependency(CallSite(C)); 335 // FIXME: Move the checking logic to MemDep! 336 CallInst* cdep = nullptr; 337 338 // Check to see if we have a single dominating call instruction that is 339 // identical to C. 340 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 341 const NonLocalDepEntry *I = &deps[i]; 342 if (I->getResult().isNonLocal()) 343 continue; 344 345 // We don't handle non-definitions. If we already have a call, reject 346 // instruction dependencies. 347 if (!I->getResult().isDef() || cdep != nullptr) { 348 cdep = nullptr; 349 break; 350 } 351 352 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 353 // FIXME: All duplicated with non-local case. 354 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 355 cdep = NonLocalDepCall; 356 continue; 357 } 358 359 cdep = nullptr; 360 break; 361 } 362 363 if (!cdep) { 364 valueNumbering[C] = nextValueNumber; 365 return nextValueNumber++; 366 } 367 368 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 369 valueNumbering[C] = nextValueNumber; 370 return nextValueNumber++; 371 } 372 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 373 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 374 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 375 if (c_vn != cd_vn) { 376 valueNumbering[C] = nextValueNumber; 377 return nextValueNumber++; 378 } 379 } 380 381 uint32_t v = lookup_or_add(cdep); 382 valueNumbering[C] = v; 383 return v; 384 385 } else { 386 valueNumbering[C] = nextValueNumber; 387 return nextValueNumber++; 388 } 389 } 390 391 /// lookup_or_add - Returns the value number for the specified value, assigning 392 /// it a new number if it did not have one before. 393 uint32_t ValueTable::lookup_or_add(Value *V) { 394 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 395 if (VI != valueNumbering.end()) 396 return VI->second; 397 398 if (!isa<Instruction>(V)) { 399 valueNumbering[V] = nextValueNumber; 400 return nextValueNumber++; 401 } 402 403 Instruction* I = cast<Instruction>(V); 404 Expression exp; 405 switch (I->getOpcode()) { 406 case Instruction::Call: 407 return lookup_or_add_call(cast<CallInst>(I)); 408 case Instruction::Add: 409 case Instruction::FAdd: 410 case Instruction::Sub: 411 case Instruction::FSub: 412 case Instruction::Mul: 413 case Instruction::FMul: 414 case Instruction::UDiv: 415 case Instruction::SDiv: 416 case Instruction::FDiv: 417 case Instruction::URem: 418 case Instruction::SRem: 419 case Instruction::FRem: 420 case Instruction::Shl: 421 case Instruction::LShr: 422 case Instruction::AShr: 423 case Instruction::And: 424 case Instruction::Or: 425 case Instruction::Xor: 426 case Instruction::ICmp: 427 case Instruction::FCmp: 428 case Instruction::Trunc: 429 case Instruction::ZExt: 430 case Instruction::SExt: 431 case Instruction::FPToUI: 432 case Instruction::FPToSI: 433 case Instruction::UIToFP: 434 case Instruction::SIToFP: 435 case Instruction::FPTrunc: 436 case Instruction::FPExt: 437 case Instruction::PtrToInt: 438 case Instruction::IntToPtr: 439 case Instruction::BitCast: 440 case Instruction::Select: 441 case Instruction::ExtractElement: 442 case Instruction::InsertElement: 443 case Instruction::ShuffleVector: 444 case Instruction::InsertValue: 445 case Instruction::GetElementPtr: 446 exp = create_expression(I); 447 break; 448 case Instruction::ExtractValue: 449 exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); 450 break; 451 default: 452 valueNumbering[V] = nextValueNumber; 453 return nextValueNumber++; 454 } 455 456 uint32_t& e = expressionNumbering[exp]; 457 if (!e) e = nextValueNumber++; 458 valueNumbering[V] = e; 459 return e; 460 } 461 462 /// Returns the value number of the specified value. Fails if 463 /// the value has not yet been numbered. 464 uint32_t ValueTable::lookup(Value *V) const { 465 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 466 assert(VI != valueNumbering.end() && "Value not numbered?"); 467 return VI->second; 468 } 469 470 /// Returns the value number of the given comparison, 471 /// assigning it a new number if it did not have one before. Useful when 472 /// we deduced the result of a comparison, but don't immediately have an 473 /// instruction realizing that comparison to hand. 474 uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode, 475 CmpInst::Predicate Predicate, 476 Value *LHS, Value *RHS) { 477 Expression exp = create_cmp_expression(Opcode, Predicate, LHS, RHS); 478 uint32_t& e = expressionNumbering[exp]; 479 if (!e) e = nextValueNumber++; 480 return e; 481 } 482 483 /// Remove all entries from the ValueTable. 484 void ValueTable::clear() { 485 valueNumbering.clear(); 486 expressionNumbering.clear(); 487 nextValueNumber = 1; 488 } 489 490 /// Remove a value from the value numbering. 491 void ValueTable::erase(Value *V) { 492 valueNumbering.erase(V); 493 } 494 495 /// verifyRemoved - Verify that the value is removed from all internal data 496 /// structures. 497 void ValueTable::verifyRemoved(const Value *V) const { 498 for (DenseMap<Value*, uint32_t>::const_iterator 499 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 500 assert(I->first != V && "Inst still occurs in value numbering map!"); 501 } 502 } 503 504 //===----------------------------------------------------------------------===// 505 // GVN Pass 506 //===----------------------------------------------------------------------===// 507 508 namespace { 509 class GVN; 510 struct AvailableValueInBlock { 511 /// BB - The basic block in question. 512 BasicBlock *BB; 513 enum ValType { 514 SimpleVal, // A simple offsetted value that is accessed. 515 LoadVal, // A value produced by a load. 516 MemIntrin, // A memory intrinsic which is loaded from. 517 UndefVal // A UndefValue representing a value from dead block (which 518 // is not yet physically removed from the CFG). 519 }; 520 521 /// V - The value that is live out of the block. 522 PointerIntPair<Value *, 2, ValType> Val; 523 524 /// Offset - The byte offset in Val that is interesting for the load query. 525 unsigned Offset; 526 527 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 528 unsigned Offset = 0) { 529 AvailableValueInBlock Res; 530 Res.BB = BB; 531 Res.Val.setPointer(V); 532 Res.Val.setInt(SimpleVal); 533 Res.Offset = Offset; 534 return Res; 535 } 536 537 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 538 unsigned Offset = 0) { 539 AvailableValueInBlock Res; 540 Res.BB = BB; 541 Res.Val.setPointer(MI); 542 Res.Val.setInt(MemIntrin); 543 Res.Offset = Offset; 544 return Res; 545 } 546 547 static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, 548 unsigned Offset = 0) { 549 AvailableValueInBlock Res; 550 Res.BB = BB; 551 Res.Val.setPointer(LI); 552 Res.Val.setInt(LoadVal); 553 Res.Offset = Offset; 554 return Res; 555 } 556 557 static AvailableValueInBlock getUndef(BasicBlock *BB) { 558 AvailableValueInBlock Res; 559 Res.BB = BB; 560 Res.Val.setPointer(nullptr); 561 Res.Val.setInt(UndefVal); 562 Res.Offset = 0; 563 return Res; 564 } 565 566 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 567 bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } 568 bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } 569 bool isUndefValue() const { return Val.getInt() == UndefVal; } 570 571 Value *getSimpleValue() const { 572 assert(isSimpleValue() && "Wrong accessor"); 573 return Val.getPointer(); 574 } 575 576 LoadInst *getCoercedLoadValue() const { 577 assert(isCoercedLoadValue() && "Wrong accessor"); 578 return cast<LoadInst>(Val.getPointer()); 579 } 580 581 MemIntrinsic *getMemIntrinValue() const { 582 assert(isMemIntrinValue() && "Wrong accessor"); 583 return cast<MemIntrinsic>(Val.getPointer()); 584 } 585 586 /// Emit code into this block to adjust the value defined here to the 587 /// specified type. This handles various coercion cases. 588 Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const; 589 }; 590 591 class GVN : public FunctionPass { 592 bool NoLoads; 593 MemoryDependenceAnalysis *MD; 594 DominatorTree *DT; 595 const TargetLibraryInfo *TLI; 596 AssumptionCache *AC; 597 SetVector<BasicBlock *> DeadBlocks; 598 599 ValueTable VN; 600 601 /// A mapping from value numbers to lists of Value*'s that 602 /// have that value number. Use findLeader to query it. 603 struct LeaderTableEntry { 604 Value *Val; 605 const BasicBlock *BB; 606 LeaderTableEntry *Next; 607 }; 608 DenseMap<uint32_t, LeaderTableEntry> LeaderTable; 609 BumpPtrAllocator TableAllocator; 610 611 SmallVector<Instruction*, 8> InstrsToErase; 612 613 typedef SmallVector<NonLocalDepResult, 64> LoadDepVect; 614 typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect; 615 typedef SmallVector<BasicBlock*, 64> UnavailBlkVect; 616 617 public: 618 static char ID; // Pass identification, replacement for typeid 619 explicit GVN(bool noloads = false) 620 : FunctionPass(ID), NoLoads(noloads), MD(nullptr) { 621 initializeGVNPass(*PassRegistry::getPassRegistry()); 622 } 623 624 bool runOnFunction(Function &F) override; 625 626 /// This removes the specified instruction from 627 /// our various maps and marks it for deletion. 628 void markInstructionForDeletion(Instruction *I) { 629 VN.erase(I); 630 InstrsToErase.push_back(I); 631 } 632 633 DominatorTree &getDominatorTree() const { return *DT; } 634 AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } 635 MemoryDependenceAnalysis &getMemDep() const { return *MD; } 636 private: 637 /// Push a new Value to the LeaderTable onto the list for its value number. 638 void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { 639 LeaderTableEntry &Curr = LeaderTable[N]; 640 if (!Curr.Val) { 641 Curr.Val = V; 642 Curr.BB = BB; 643 return; 644 } 645 646 LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); 647 Node->Val = V; 648 Node->BB = BB; 649 Node->Next = Curr.Next; 650 Curr.Next = Node; 651 } 652 653 /// Scan the list of values corresponding to a given 654 /// value number, and remove the given instruction if encountered. 655 void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { 656 LeaderTableEntry* Prev = nullptr; 657 LeaderTableEntry* Curr = &LeaderTable[N]; 658 659 while (Curr->Val != I || Curr->BB != BB) { 660 Prev = Curr; 661 Curr = Curr->Next; 662 } 663 664 if (Prev) { 665 Prev->Next = Curr->Next; 666 } else { 667 if (!Curr->Next) { 668 Curr->Val = nullptr; 669 Curr->BB = nullptr; 670 } else { 671 LeaderTableEntry* Next = Curr->Next; 672 Curr->Val = Next->Val; 673 Curr->BB = Next->BB; 674 Curr->Next = Next->Next; 675 } 676 } 677 } 678 679 // List of critical edges to be split between iterations. 680 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 681 682 // This transformation requires dominator postdominator info 683 void getAnalysisUsage(AnalysisUsage &AU) const override { 684 AU.addRequired<AssumptionCacheTracker>(); 685 AU.addRequired<DominatorTreeWrapperPass>(); 686 AU.addRequired<TargetLibraryInfoWrapperPass>(); 687 if (!NoLoads) 688 AU.addRequired<MemoryDependenceAnalysis>(); 689 AU.addRequired<AliasAnalysis>(); 690 691 AU.addPreserved<DominatorTreeWrapperPass>(); 692 AU.addPreserved<AliasAnalysis>(); 693 } 694 695 696 // Helper fuctions of redundant load elimination 697 bool processLoad(LoadInst *L); 698 bool processNonLocalLoad(LoadInst *L); 699 void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 700 AvailValInBlkVect &ValuesPerBlock, 701 UnavailBlkVect &UnavailableBlocks); 702 bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 703 UnavailBlkVect &UnavailableBlocks); 704 705 // Other helper routines 706 bool processInstruction(Instruction *I); 707 bool processBlock(BasicBlock *BB); 708 void dump(DenseMap<uint32_t, Value*> &d); 709 bool iterateOnFunction(Function &F); 710 bool performPRE(Function &F); 711 bool performScalarPRE(Instruction *I); 712 bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, 713 unsigned int ValNo); 714 Value *findLeader(const BasicBlock *BB, uint32_t num); 715 void cleanupGlobalSets(); 716 void verifyRemoved(const Instruction *I) const; 717 bool splitCriticalEdges(); 718 BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); 719 unsigned replaceAllDominatedUsesWith(Value *From, Value *To, 720 const BasicBlockEdge &Root); 721 bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); 722 bool processFoldableCondBr(BranchInst *BI); 723 void addDeadBlock(BasicBlock *BB); 724 void assignValNumForDeadCode(); 725 }; 726 727 char GVN::ID = 0; 728 } 729 730 // The public interface to this file... 731 FunctionPass *llvm::createGVNPass(bool NoLoads) { 732 return new GVN(NoLoads); 733 } 734 735 INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 736 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 737 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 738 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 739 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 740 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 741 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 742 743 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 744 void GVN::dump(DenseMap<uint32_t, Value*>& d) { 745 errs() << "{\n"; 746 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 747 E = d.end(); I != E; ++I) { 748 errs() << I->first << "\n"; 749 I->second->dump(); 750 } 751 errs() << "}\n"; 752 } 753 #endif 754 755 /// Return true if we can prove that the value 756 /// we're analyzing is fully available in the specified block. As we go, keep 757 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This 758 /// map is actually a tri-state map with the following values: 759 /// 0) we know the block *is not* fully available. 760 /// 1) we know the block *is* fully available. 761 /// 2) we do not know whether the block is fully available or not, but we are 762 /// currently speculating that it will be. 763 /// 3) we are speculating for this block and have used that to speculate for 764 /// other blocks. 765 static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 766 DenseMap<BasicBlock*, char> &FullyAvailableBlocks, 767 uint32_t RecurseDepth) { 768 if (RecurseDepth > MaxRecurseDepth) 769 return false; 770 771 // Optimistically assume that the block is fully available and check to see 772 // if we already know about this block in one lookup. 773 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 774 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 775 776 // If the entry already existed for this block, return the precomputed value. 777 if (!IV.second) { 778 // If this is a speculative "available" value, mark it as being used for 779 // speculation of other blocks. 780 if (IV.first->second == 2) 781 IV.first->second = 3; 782 return IV.first->second != 0; 783 } 784 785 // Otherwise, see if it is fully available in all predecessors. 786 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 787 788 // If this block has no predecessors, it isn't live-in here. 789 if (PI == PE) 790 goto SpeculationFailure; 791 792 for (; PI != PE; ++PI) 793 // If the value isn't fully available in one of our predecessors, then it 794 // isn't fully available in this block either. Undo our previous 795 // optimistic assumption and bail out. 796 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1)) 797 goto SpeculationFailure; 798 799 return true; 800 801 // If we get here, we found out that this is not, after 802 // all, a fully-available block. We have a problem if we speculated on this and 803 // used the speculation to mark other blocks as available. 804 SpeculationFailure: 805 char &BBVal = FullyAvailableBlocks[BB]; 806 807 // If we didn't speculate on this, just return with it set to false. 808 if (BBVal == 2) { 809 BBVal = 0; 810 return false; 811 } 812 813 // If we did speculate on this value, we could have blocks set to 1 that are 814 // incorrect. Walk the (transitive) successors of this block and mark them as 815 // 0 if set to one. 816 SmallVector<BasicBlock*, 32> BBWorklist; 817 BBWorklist.push_back(BB); 818 819 do { 820 BasicBlock *Entry = BBWorklist.pop_back_val(); 821 // Note that this sets blocks to 0 (unavailable) if they happen to not 822 // already be in FullyAvailableBlocks. This is safe. 823 char &EntryVal = FullyAvailableBlocks[Entry]; 824 if (EntryVal == 0) continue; // Already unavailable. 825 826 // Mark as unavailable. 827 EntryVal = 0; 828 829 BBWorklist.append(succ_begin(Entry), succ_end(Entry)); 830 } while (!BBWorklist.empty()); 831 832 return false; 833 } 834 835 836 /// Return true if CoerceAvailableValueToLoadType will succeed. 837 static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 838 Type *LoadTy, 839 const DataLayout &DL) { 840 // If the loaded or stored value is an first class array or struct, don't try 841 // to transform them. We need to be able to bitcast to integer. 842 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 843 StoredVal->getType()->isStructTy() || 844 StoredVal->getType()->isArrayTy()) 845 return false; 846 847 // The store has to be at least as big as the load. 848 if (DL.getTypeSizeInBits(StoredVal->getType()) < 849 DL.getTypeSizeInBits(LoadTy)) 850 return false; 851 852 return true; 853 } 854 855 /// If we saw a store of a value to memory, and 856 /// then a load from a must-aliased pointer of a different type, try to coerce 857 /// the stored value. LoadedTy is the type of the load we want to replace and 858 /// InsertPt is the place to insert new instructions. 859 /// 860 /// If we can't do it, return null. 861 static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 862 Type *LoadedTy, 863 Instruction *InsertPt, 864 const DataLayout &DL) { 865 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL)) 866 return nullptr; 867 868 // If this is already the right type, just return it. 869 Type *StoredValTy = StoredVal->getType(); 870 871 uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy); 872 uint64_t LoadSize = DL.getTypeSizeInBits(LoadedTy); 873 874 // If the store and reload are the same size, we can always reuse it. 875 if (StoreSize == LoadSize) { 876 // Pointer to Pointer -> use bitcast. 877 if (StoredValTy->getScalarType()->isPointerTy() && 878 LoadedTy->getScalarType()->isPointerTy()) 879 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 880 881 // Convert source pointers to integers, which can be bitcast. 882 if (StoredValTy->getScalarType()->isPointerTy()) { 883 StoredValTy = DL.getIntPtrType(StoredValTy); 884 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 885 } 886 887 Type *TypeToCastTo = LoadedTy; 888 if (TypeToCastTo->getScalarType()->isPointerTy()) 889 TypeToCastTo = DL.getIntPtrType(TypeToCastTo); 890 891 if (StoredValTy != TypeToCastTo) 892 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 893 894 // Cast to pointer if the load needs a pointer type. 895 if (LoadedTy->getScalarType()->isPointerTy()) 896 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 897 898 return StoredVal; 899 } 900 901 // If the loaded value is smaller than the available value, then we can 902 // extract out a piece from it. If the available value is too small, then we 903 // can't do anything. 904 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 905 906 // Convert source pointers to integers, which can be manipulated. 907 if (StoredValTy->getScalarType()->isPointerTy()) { 908 StoredValTy = DL.getIntPtrType(StoredValTy); 909 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 910 } 911 912 // Convert vectors and fp to integer, which can be manipulated. 913 if (!StoredValTy->isIntegerTy()) { 914 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 915 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 916 } 917 918 // If this is a big-endian system, we need to shift the value down to the low 919 // bits so that a truncate will work. 920 if (DL.isBigEndian()) { 921 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 922 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 923 } 924 925 // Truncate the integer to the right size now. 926 Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 927 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 928 929 if (LoadedTy == NewIntTy) 930 return StoredVal; 931 932 // If the result is a pointer, inttoptr. 933 if (LoadedTy->getScalarType()->isPointerTy()) 934 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 935 936 // Otherwise, bitcast. 937 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 938 } 939 940 /// This function is called when we have a 941 /// memdep query of a load that ends up being a clobbering memory write (store, 942 /// memset, memcpy, memmove). This means that the write *may* provide bits used 943 /// by the load but we can't be sure because the pointers don't mustalias. 944 /// 945 /// Check this case to see if there is anything more we can do before we give 946 /// up. This returns -1 if we have to give up, or a byte number in the stored 947 /// value of the piece that feeds the load. 948 static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, 949 Value *WritePtr, 950 uint64_t WriteSizeInBits, 951 const DataLayout &DL) { 952 // If the loaded or stored value is a first class array or struct, don't try 953 // to transform them. We need to be able to bitcast to integer. 954 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 955 return -1; 956 957 int64_t StoreOffset = 0, LoadOffset = 0; 958 Value *StoreBase = 959 GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); 960 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); 961 if (StoreBase != LoadBase) 962 return -1; 963 964 // If the load and store are to the exact same address, they should have been 965 // a must alias. AA must have gotten confused. 966 // FIXME: Study to see if/when this happens. One case is forwarding a memset 967 // to a load from the base of the memset. 968 #if 0 969 if (LoadOffset == StoreOffset) { 970 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 971 << "Base = " << *StoreBase << "\n" 972 << "Store Ptr = " << *WritePtr << "\n" 973 << "Store Offs = " << StoreOffset << "\n" 974 << "Load Ptr = " << *LoadPtr << "\n"; 975 abort(); 976 } 977 #endif 978 979 // If the load and store don't overlap at all, the store doesn't provide 980 // anything to the load. In this case, they really don't alias at all, AA 981 // must have gotten confused. 982 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); 983 984 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 985 return -1; 986 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 987 LoadSize >>= 3; 988 989 990 bool isAAFailure = false; 991 if (StoreOffset < LoadOffset) 992 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 993 else 994 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 995 996 if (isAAFailure) { 997 #if 0 998 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 999 << "Base = " << *StoreBase << "\n" 1000 << "Store Ptr = " << *WritePtr << "\n" 1001 << "Store Offs = " << StoreOffset << "\n" 1002 << "Load Ptr = " << *LoadPtr << "\n"; 1003 abort(); 1004 #endif 1005 return -1; 1006 } 1007 1008 // If the Load isn't completely contained within the stored bits, we don't 1009 // have all the bits to feed it. We could do something crazy in the future 1010 // (issue a smaller load then merge the bits in) but this seems unlikely to be 1011 // valuable. 1012 if (StoreOffset > LoadOffset || 1013 StoreOffset+StoreSize < LoadOffset+LoadSize) 1014 return -1; 1015 1016 // Okay, we can do this transformation. Return the number of bytes into the 1017 // store that the load is. 1018 return LoadOffset-StoreOffset; 1019 } 1020 1021 /// This function is called when we have a 1022 /// memdep query of a load that ends up being a clobbering store. 1023 static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, 1024 StoreInst *DepSI) { 1025 // Cannot handle reading from store of first-class aggregate yet. 1026 if (DepSI->getValueOperand()->getType()->isStructTy() || 1027 DepSI->getValueOperand()->getType()->isArrayTy()) 1028 return -1; 1029 1030 const DataLayout &DL = DepSI->getModule()->getDataLayout(); 1031 Value *StorePtr = DepSI->getPointerOperand(); 1032 uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 1033 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 1034 StorePtr, StoreSize, DL); 1035 } 1036 1037 /// This function is called when we have a 1038 /// memdep query of a load that ends up being clobbered by another load. See if 1039 /// the other load can feed into the second load. 1040 static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, 1041 LoadInst *DepLI, const DataLayout &DL){ 1042 // Cannot handle reading from store of first-class aggregate yet. 1043 if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) 1044 return -1; 1045 1046 Value *DepPtr = DepLI->getPointerOperand(); 1047 uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); 1048 int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); 1049 if (R != -1) return R; 1050 1051 // If we have a load/load clobber an DepLI can be widened to cover this load, 1052 // then we should widen it! 1053 int64_t LoadOffs = 0; 1054 const Value *LoadBase = 1055 GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); 1056 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 1057 1058 unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize( 1059 LoadBase, LoadOffs, LoadSize, DepLI); 1060 if (Size == 0) return -1; 1061 1062 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL); 1063 } 1064 1065 1066 1067 static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, 1068 MemIntrinsic *MI, 1069 const DataLayout &DL) { 1070 // If the mem operation is a non-constant size, we can't handle it. 1071 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 1072 if (!SizeCst) return -1; 1073 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 1074 1075 // If this is memset, we just need to see if the offset is valid in the size 1076 // of the memset.. 1077 if (MI->getIntrinsicID() == Intrinsic::memset) 1078 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 1079 MemSizeInBits, DL); 1080 1081 // If we have a memcpy/memmove, the only case we can handle is if this is a 1082 // copy from constant memory. In that case, we can read directly from the 1083 // constant memory. 1084 MemTransferInst *MTI = cast<MemTransferInst>(MI); 1085 1086 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 1087 if (!Src) return -1; 1088 1089 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); 1090 if (!GV || !GV->isConstant()) return -1; 1091 1092 // See if the access is within the bounds of the transfer. 1093 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 1094 MI->getDest(), MemSizeInBits, DL); 1095 if (Offset == -1) 1096 return Offset; 1097 1098 unsigned AS = Src->getType()->getPointerAddressSpace(); 1099 // Otherwise, see if we can constant fold a load from the constant with the 1100 // offset applied as appropriate. 1101 Src = ConstantExpr::getBitCast(Src, 1102 Type::getInt8PtrTy(Src->getContext(), AS)); 1103 Constant *OffsetCst = 1104 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1105 Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, 1106 OffsetCst); 1107 Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); 1108 if (ConstantFoldLoadFromConstPtr(Src, DL)) 1109 return Offset; 1110 return -1; 1111 } 1112 1113 1114 /// This function is called when we have a 1115 /// memdep query of a load that ends up being a clobbering store. This means 1116 /// that the store provides bits used by the load but we the pointers don't 1117 /// mustalias. Check this case to see if there is anything more we can do 1118 /// before we give up. 1119 static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 1120 Type *LoadTy, 1121 Instruction *InsertPt, const DataLayout &DL){ 1122 LLVMContext &Ctx = SrcVal->getType()->getContext(); 1123 1124 uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 1125 uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; 1126 1127 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1128 1129 // Compute which bits of the stored value are being used by the load. Convert 1130 // to an integer type to start with. 1131 if (SrcVal->getType()->getScalarType()->isPointerTy()) 1132 SrcVal = Builder.CreatePtrToInt(SrcVal, 1133 DL.getIntPtrType(SrcVal->getType())); 1134 if (!SrcVal->getType()->isIntegerTy()) 1135 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); 1136 1137 // Shift the bits to the least significant depending on endianness. 1138 unsigned ShiftAmt; 1139 if (DL.isLittleEndian()) 1140 ShiftAmt = Offset*8; 1141 else 1142 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 1143 1144 if (ShiftAmt) 1145 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); 1146 1147 if (LoadSize != StoreSize) 1148 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); 1149 1150 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL); 1151 } 1152 1153 /// This function is called when we have a 1154 /// memdep query of a load that ends up being a clobbering load. This means 1155 /// that the load *may* provide bits used by the load but we can't be sure 1156 /// because the pointers don't mustalias. Check this case to see if there is 1157 /// anything more we can do before we give up. 1158 static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, 1159 Type *LoadTy, Instruction *InsertPt, 1160 GVN &gvn) { 1161 const DataLayout &DL = SrcVal->getModule()->getDataLayout(); 1162 // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to 1163 // widen SrcVal out to a larger load. 1164 unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()); 1165 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 1166 if (Offset+LoadSize > SrcValSize) { 1167 assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); 1168 assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); 1169 // If we have a load/load clobber an DepLI can be widened to cover this 1170 // load, then we should widen it to the next power of 2 size big enough! 1171 unsigned NewLoadSize = Offset+LoadSize; 1172 if (!isPowerOf2_32(NewLoadSize)) 1173 NewLoadSize = NextPowerOf2(NewLoadSize); 1174 1175 Value *PtrVal = SrcVal->getPointerOperand(); 1176 1177 // Insert the new load after the old load. This ensures that subsequent 1178 // memdep queries will find the new load. We can't easily remove the old 1179 // load completely because it is already in the value numbering table. 1180 IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); 1181 Type *DestPTy = 1182 IntegerType::get(LoadTy->getContext(), NewLoadSize*8); 1183 DestPTy = PointerType::get(DestPTy, 1184 PtrVal->getType()->getPointerAddressSpace()); 1185 Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); 1186 PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); 1187 LoadInst *NewLoad = Builder.CreateLoad(PtrVal); 1188 NewLoad->takeName(SrcVal); 1189 NewLoad->setAlignment(SrcVal->getAlignment()); 1190 1191 DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); 1192 DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); 1193 1194 // Replace uses of the original load with the wider load. On a big endian 1195 // system, we need to shift down to get the relevant bits. 1196 Value *RV = NewLoad; 1197 if (DL.isBigEndian()) 1198 RV = Builder.CreateLShr(RV, 1199 NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); 1200 RV = Builder.CreateTrunc(RV, SrcVal->getType()); 1201 SrcVal->replaceAllUsesWith(RV); 1202 1203 // We would like to use gvn.markInstructionForDeletion here, but we can't 1204 // because the load is already memoized into the leader map table that GVN 1205 // tracks. It is potentially possible to remove the load from the table, 1206 // but then there all of the operations based on it would need to be 1207 // rehashed. Just leave the dead load around. 1208 gvn.getMemDep().removeInstruction(SrcVal); 1209 SrcVal = NewLoad; 1210 } 1211 1212 return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); 1213 } 1214 1215 1216 /// This function is called when we have a 1217 /// memdep query of a load that ends up being a clobbering mem intrinsic. 1218 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 1219 Type *LoadTy, Instruction *InsertPt, 1220 const DataLayout &DL){ 1221 LLVMContext &Ctx = LoadTy->getContext(); 1222 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8; 1223 1224 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1225 1226 // We know that this method is only called when the mem transfer fully 1227 // provides the bits for the load. 1228 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 1229 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 1230 // independently of what the offset is. 1231 Value *Val = MSI->getValue(); 1232 if (LoadSize != 1) 1233 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 1234 1235 Value *OneElt = Val; 1236 1237 // Splat the value out to the right number of bits. 1238 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 1239 // If we can double the number of bytes set, do it. 1240 if (NumBytesSet*2 <= LoadSize) { 1241 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 1242 Val = Builder.CreateOr(Val, ShVal); 1243 NumBytesSet <<= 1; 1244 continue; 1245 } 1246 1247 // Otherwise insert one byte at a time. 1248 Value *ShVal = Builder.CreateShl(Val, 1*8); 1249 Val = Builder.CreateOr(OneElt, ShVal); 1250 ++NumBytesSet; 1251 } 1252 1253 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, DL); 1254 } 1255 1256 // Otherwise, this is a memcpy/memmove from a constant global. 1257 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 1258 Constant *Src = cast<Constant>(MTI->getSource()); 1259 unsigned AS = Src->getType()->getPointerAddressSpace(); 1260 1261 // Otherwise, see if we can constant fold a load from the constant with the 1262 // offset applied as appropriate. 1263 Src = ConstantExpr::getBitCast(Src, 1264 Type::getInt8PtrTy(Src->getContext(), AS)); 1265 Constant *OffsetCst = 1266 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1267 Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, 1268 OffsetCst); 1269 Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); 1270 return ConstantFoldLoadFromConstPtr(Src, DL); 1271 } 1272 1273 1274 /// Given a set of loads specified by ValuesPerBlock, 1275 /// construct SSA form, allowing us to eliminate LI. This returns the value 1276 /// that should be used at LI's definition site. 1277 static Value *ConstructSSAForLoadSet(LoadInst *LI, 1278 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1279 GVN &gvn) { 1280 // Check for the fully redundant, dominating load case. In this case, we can 1281 // just use the dominating value directly. 1282 if (ValuesPerBlock.size() == 1 && 1283 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, 1284 LI->getParent())) { 1285 assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block"); 1286 return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn); 1287 } 1288 1289 // Otherwise, we have to construct SSA form. 1290 SmallVector<PHINode*, 8> NewPHIs; 1291 SSAUpdater SSAUpdate(&NewPHIs); 1292 SSAUpdate.Initialize(LI->getType(), LI->getName()); 1293 1294 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1295 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1296 BasicBlock *BB = AV.BB; 1297 1298 if (SSAUpdate.HasValueForBlock(BB)) 1299 continue; 1300 1301 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn)); 1302 } 1303 1304 // Perform PHI construction. 1305 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1306 1307 // If new PHI nodes were created, notify alias analysis. 1308 if (V->getType()->getScalarType()->isPointerTy()) { 1309 AliasAnalysis *AA = gvn.getAliasAnalysis(); 1310 1311 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1312 AA->copyValue(LI, NewPHIs[i]); 1313 1314 // Now that we've copied information to the new PHIs, scan through 1315 // them again and inform alias analysis that we've added potentially 1316 // escaping uses to any values that are operands to these PHIs. 1317 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { 1318 PHINode *P = NewPHIs[i]; 1319 for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { 1320 unsigned jj = PHINode::getOperandNumForIncomingValue(ii); 1321 AA->addEscapingUse(P->getOperandUse(jj)); 1322 } 1323 } 1324 } 1325 1326 return V; 1327 } 1328 1329 Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI, 1330 GVN &gvn) const { 1331 Value *Res; 1332 Type *LoadTy = LI->getType(); 1333 const DataLayout &DL = LI->getModule()->getDataLayout(); 1334 if (isSimpleValue()) { 1335 Res = getSimpleValue(); 1336 if (Res->getType() != LoadTy) { 1337 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL); 1338 1339 DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1340 << *getSimpleValue() << '\n' 1341 << *Res << '\n' << "\n\n\n"); 1342 } 1343 } else if (isCoercedLoadValue()) { 1344 LoadInst *Load = getCoercedLoadValue(); 1345 if (Load->getType() == LoadTy && Offset == 0) { 1346 Res = Load; 1347 } else { 1348 Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), 1349 gvn); 1350 1351 DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " 1352 << *getCoercedLoadValue() << '\n' 1353 << *Res << '\n' << "\n\n\n"); 1354 } 1355 } else if (isMemIntrinValue()) { 1356 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, 1357 BB->getTerminator(), DL); 1358 DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1359 << " " << *getMemIntrinValue() << '\n' 1360 << *Res << '\n' << "\n\n\n"); 1361 } else { 1362 assert(isUndefValue() && "Should be UndefVal"); 1363 DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); 1364 return UndefValue::get(LoadTy); 1365 } 1366 return Res; 1367 } 1368 1369 static bool isLifetimeStart(const Instruction *Inst) { 1370 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1371 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1372 return false; 1373 } 1374 1375 void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 1376 AvailValInBlkVect &ValuesPerBlock, 1377 UnavailBlkVect &UnavailableBlocks) { 1378 1379 // Filter out useless results (non-locals, etc). Keep track of the blocks 1380 // where we have a value available in repl, also keep track of whether we see 1381 // dependencies that produce an unknown value for the load (such as a call 1382 // that could potentially clobber the load). 1383 unsigned NumDeps = Deps.size(); 1384 const DataLayout &DL = LI->getModule()->getDataLayout(); 1385 for (unsigned i = 0, e = NumDeps; i != e; ++i) { 1386 BasicBlock *DepBB = Deps[i].getBB(); 1387 MemDepResult DepInfo = Deps[i].getResult(); 1388 1389 if (DeadBlocks.count(DepBB)) { 1390 // Dead dependent mem-op disguise as a load evaluating the same value 1391 // as the load in question. 1392 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); 1393 continue; 1394 } 1395 1396 if (!DepInfo.isDef() && !DepInfo.isClobber()) { 1397 UnavailableBlocks.push_back(DepBB); 1398 continue; 1399 } 1400 1401 if (DepInfo.isClobber()) { 1402 // The address being loaded in this non-local block may not be the same as 1403 // the pointer operand of the load if PHI translation occurs. Make sure 1404 // to consider the right address. 1405 Value *Address = Deps[i].getAddress(); 1406 1407 // If the dependence is to a store that writes to a superset of the bits 1408 // read by the load, we can extract the bits we need for the load from the 1409 // stored value. 1410 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1411 if (Address) { 1412 int Offset = 1413 AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI); 1414 if (Offset != -1) { 1415 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1416 DepSI->getValueOperand(), 1417 Offset)); 1418 continue; 1419 } 1420 } 1421 } 1422 1423 // Check to see if we have something like this: 1424 // load i32* P 1425 // load i8* (P+1) 1426 // if we have this, replace the later with an extraction from the former. 1427 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { 1428 // If this is a clobber and L is the first instruction in its block, then 1429 // we have the first instruction in the entry block. 1430 if (DepLI != LI && Address) { 1431 int Offset = 1432 AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL); 1433 1434 if (Offset != -1) { 1435 ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, 1436 Offset)); 1437 continue; 1438 } 1439 } 1440 } 1441 1442 // If the clobbering value is a memset/memcpy/memmove, see if we can 1443 // forward a value on from it. 1444 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1445 if (Address) { 1446 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1447 DepMI, DL); 1448 if (Offset != -1) { 1449 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1450 Offset)); 1451 continue; 1452 } 1453 } 1454 } 1455 1456 UnavailableBlocks.push_back(DepBB); 1457 continue; 1458 } 1459 1460 // DepInfo.isDef() here 1461 1462 Instruction *DepInst = DepInfo.getInst(); 1463 1464 // Loading the allocation -> undef. 1465 if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || 1466 // Loading immediately after lifetime begin -> undef. 1467 isLifetimeStart(DepInst)) { 1468 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1469 UndefValue::get(LI->getType()))); 1470 continue; 1471 } 1472 1473 // Loading from calloc (which zero initializes memory) -> zero 1474 if (isCallocLikeFn(DepInst, TLI)) { 1475 ValuesPerBlock.push_back(AvailableValueInBlock::get( 1476 DepBB, Constant::getNullValue(LI->getType()))); 1477 continue; 1478 } 1479 1480 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1481 // Reject loads and stores that are to the same address but are of 1482 // different types if we have to. 1483 if (S->getValueOperand()->getType() != LI->getType()) { 1484 // If the stored value is larger or equal to the loaded value, we can 1485 // reuse it. 1486 if (!CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 1487 LI->getType(), DL)) { 1488 UnavailableBlocks.push_back(DepBB); 1489 continue; 1490 } 1491 } 1492 1493 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1494 S->getValueOperand())); 1495 continue; 1496 } 1497 1498 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1499 // If the types mismatch and we can't handle it, reject reuse of the load. 1500 if (LD->getType() != LI->getType()) { 1501 // If the stored value is larger or equal to the loaded value, we can 1502 // reuse it. 1503 if (!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) { 1504 UnavailableBlocks.push_back(DepBB); 1505 continue; 1506 } 1507 } 1508 ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); 1509 continue; 1510 } 1511 1512 UnavailableBlocks.push_back(DepBB); 1513 } 1514 } 1515 1516 bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 1517 UnavailBlkVect &UnavailableBlocks) { 1518 // Okay, we have *some* definitions of the value. This means that the value 1519 // is available in some of our (transitive) predecessors. Lets think about 1520 // doing PRE of this load. This will involve inserting a new load into the 1521 // predecessor when it's not available. We could do this in general, but 1522 // prefer to not increase code size. As such, we only do this when we know 1523 // that we only have to insert *one* load (which means we're basically moving 1524 // the load, not inserting a new one). 1525 1526 SmallPtrSet<BasicBlock *, 4> Blockers; 1527 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1528 Blockers.insert(UnavailableBlocks[i]); 1529 1530 // Let's find the first basic block with more than one predecessor. Walk 1531 // backwards through predecessors if needed. 1532 BasicBlock *LoadBB = LI->getParent(); 1533 BasicBlock *TmpBB = LoadBB; 1534 1535 while (TmpBB->getSinglePredecessor()) { 1536 TmpBB = TmpBB->getSinglePredecessor(); 1537 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1538 return false; 1539 if (Blockers.count(TmpBB)) 1540 return false; 1541 1542 // If any of these blocks has more than one successor (i.e. if the edge we 1543 // just traversed was critical), then there are other paths through this 1544 // block along which the load may not be anticipated. Hoisting the load 1545 // above this block would be adding the load to execution paths along 1546 // which it was not previously executed. 1547 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1548 return false; 1549 } 1550 1551 assert(TmpBB); 1552 LoadBB = TmpBB; 1553 1554 // Check to see how many predecessors have the loaded value fully 1555 // available. 1556 MapVector<BasicBlock *, Value *> PredLoads; 1557 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1558 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1559 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1560 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1561 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1562 1563 SmallVector<BasicBlock *, 4> CriticalEdgePred; 1564 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1565 PI != E; ++PI) { 1566 BasicBlock *Pred = *PI; 1567 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { 1568 continue; 1569 } 1570 1571 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1572 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1573 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1574 << Pred->getName() << "': " << *LI << '\n'); 1575 return false; 1576 } 1577 1578 if (LoadBB->isLandingPad()) { 1579 DEBUG(dbgs() 1580 << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" 1581 << Pred->getName() << "': " << *LI << '\n'); 1582 return false; 1583 } 1584 1585 CriticalEdgePred.push_back(Pred); 1586 } else { 1587 // Only add the predecessors that will not be split for now. 1588 PredLoads[Pred] = nullptr; 1589 } 1590 } 1591 1592 // Decide whether PRE is profitable for this load. 1593 unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size(); 1594 assert(NumUnavailablePreds != 0 && 1595 "Fully available value should already be eliminated!"); 1596 1597 // If this load is unavailable in multiple predecessors, reject it. 1598 // FIXME: If we could restructure the CFG, we could make a common pred with 1599 // all the preds that don't have an available LI and insert a new load into 1600 // that one block. 1601 if (NumUnavailablePreds != 1) 1602 return false; 1603 1604 // Split critical edges, and update the unavailable predecessors accordingly. 1605 for (BasicBlock *OrigPred : CriticalEdgePred) { 1606 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); 1607 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); 1608 PredLoads[NewPred] = nullptr; 1609 DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" 1610 << LoadBB->getName() << '\n'); 1611 } 1612 1613 // Check if the load can safely be moved to all the unavailable predecessors. 1614 bool CanDoPRE = true; 1615 const DataLayout &DL = LI->getModule()->getDataLayout(); 1616 SmallVector<Instruction*, 8> NewInsts; 1617 for (auto &PredLoad : PredLoads) { 1618 BasicBlock *UnavailablePred = PredLoad.first; 1619 1620 // Do PHI translation to get its value in the predecessor if necessary. The 1621 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1622 1623 // If all preds have a single successor, then we know it is safe to insert 1624 // the load on the pred (?!?), so we can insert code to materialize the 1625 // pointer if it is not available. 1626 PHITransAddr Address(LI->getPointerOperand(), DL, AC); 1627 Value *LoadPtr = nullptr; 1628 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1629 *DT, NewInsts); 1630 1631 // If we couldn't find or insert a computation of this phi translated value, 1632 // we fail PRE. 1633 if (!LoadPtr) { 1634 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1635 << *LI->getPointerOperand() << "\n"); 1636 CanDoPRE = false; 1637 break; 1638 } 1639 1640 PredLoad.second = LoadPtr; 1641 } 1642 1643 if (!CanDoPRE) { 1644 while (!NewInsts.empty()) { 1645 Instruction *I = NewInsts.pop_back_val(); 1646 if (MD) MD->removeInstruction(I); 1647 I->eraseFromParent(); 1648 } 1649 // HINT: Don't revert the edge-splitting as following transformation may 1650 // also need to split these critical edges. 1651 return !CriticalEdgePred.empty(); 1652 } 1653 1654 // Okay, we can eliminate this load by inserting a reload in the predecessor 1655 // and using PHI construction to get the value in the other predecessors, do 1656 // it. 1657 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1658 DEBUG(if (!NewInsts.empty()) 1659 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1660 << *NewInsts.back() << '\n'); 1661 1662 // Assign value numbers to the new instructions. 1663 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1664 // FIXME: We really _ought_ to insert these value numbers into their 1665 // parent's availability map. However, in doing so, we risk getting into 1666 // ordering issues. If a block hasn't been processed yet, we would be 1667 // marking a value as AVAIL-IN, which isn't what we intend. 1668 VN.lookup_or_add(NewInsts[i]); 1669 } 1670 1671 for (const auto &PredLoad : PredLoads) { 1672 BasicBlock *UnavailablePred = PredLoad.first; 1673 Value *LoadPtr = PredLoad.second; 1674 1675 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1676 LI->getAlignment(), 1677 UnavailablePred->getTerminator()); 1678 1679 // Transfer the old load's AA tags to the new load. 1680 AAMDNodes Tags; 1681 LI->getAAMetadata(Tags); 1682 if (Tags) 1683 NewLoad->setAAMetadata(Tags); 1684 1685 // Transfer DebugLoc. 1686 NewLoad->setDebugLoc(LI->getDebugLoc()); 1687 1688 // Add the newly created load. 1689 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1690 NewLoad)); 1691 MD->invalidateCachedPointerInfo(LoadPtr); 1692 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1693 } 1694 1695 // Perform PHI construction. 1696 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1697 LI->replaceAllUsesWith(V); 1698 if (isa<PHINode>(V)) 1699 V->takeName(LI); 1700 if (V->getType()->getScalarType()->isPointerTy()) 1701 MD->invalidateCachedPointerInfo(V); 1702 markInstructionForDeletion(LI); 1703 ++NumPRELoad; 1704 return true; 1705 } 1706 1707 /// Attempt to eliminate a load whose dependencies are 1708 /// non-local by performing PHI construction. 1709 bool GVN::processNonLocalLoad(LoadInst *LI) { 1710 // Step 1: Find the non-local dependencies of the load. 1711 LoadDepVect Deps; 1712 MD->getNonLocalPointerDependency(LI, Deps); 1713 1714 // If we had to process more than one hundred blocks to find the 1715 // dependencies, this load isn't worth worrying about. Optimizing 1716 // it will be too expensive. 1717 unsigned NumDeps = Deps.size(); 1718 if (NumDeps > 100) 1719 return false; 1720 1721 // If we had a phi translation failure, we'll have a single entry which is a 1722 // clobber in the current block. Reject this early. 1723 if (NumDeps == 1 && 1724 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { 1725 DEBUG( 1726 dbgs() << "GVN: non-local load "; 1727 LI->printAsOperand(dbgs()); 1728 dbgs() << " has unknown dependencies\n"; 1729 ); 1730 return false; 1731 } 1732 1733 // If this load follows a GEP, see if we can PRE the indices before analyzing. 1734 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) { 1735 for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(), 1736 OE = GEP->idx_end(); 1737 OI != OE; ++OI) 1738 if (Instruction *I = dyn_cast<Instruction>(OI->get())) 1739 performScalarPRE(I); 1740 } 1741 1742 // Step 2: Analyze the availability of the load 1743 AvailValInBlkVect ValuesPerBlock; 1744 UnavailBlkVect UnavailableBlocks; 1745 AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks); 1746 1747 // If we have no predecessors that produce a known value for this load, exit 1748 // early. 1749 if (ValuesPerBlock.empty()) 1750 return false; 1751 1752 // Step 3: Eliminate fully redundancy. 1753 // 1754 // If all of the instructions we depend on produce a known value for this 1755 // load, then it is fully redundant and we can use PHI insertion to compute 1756 // its value. Insert PHIs and remove the fully redundant value now. 1757 if (UnavailableBlocks.empty()) { 1758 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1759 1760 // Perform PHI construction. 1761 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1762 LI->replaceAllUsesWith(V); 1763 1764 if (isa<PHINode>(V)) 1765 V->takeName(LI); 1766 if (V->getType()->getScalarType()->isPointerTy()) 1767 MD->invalidateCachedPointerInfo(V); 1768 markInstructionForDeletion(LI); 1769 ++NumGVNLoad; 1770 return true; 1771 } 1772 1773 // Step 4: Eliminate partial redundancy. 1774 if (!EnablePRE || !EnableLoadPRE) 1775 return false; 1776 1777 return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); 1778 } 1779 1780 1781 static void patchReplacementInstruction(Instruction *I, Value *Repl) { 1782 // Patch the replacement so that it is not more restrictive than the value 1783 // being replaced. 1784 BinaryOperator *Op = dyn_cast<BinaryOperator>(I); 1785 BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl); 1786 if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) && 1787 isa<OverflowingBinaryOperator>(ReplOp)) { 1788 if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap()) 1789 ReplOp->setHasNoSignedWrap(false); 1790 if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap()) 1791 ReplOp->setHasNoUnsignedWrap(false); 1792 } 1793 if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) { 1794 // FIXME: If both the original and replacement value are part of the 1795 // same control-flow region (meaning that the execution of one 1796 // guarentees the executation of the other), then we can combine the 1797 // noalias scopes here and do better than the general conservative 1798 // answer used in combineMetadata(). 1799 1800 // In general, GVN unifies expressions over different control-flow 1801 // regions, and so we need a conservative combination of the noalias 1802 // scopes. 1803 unsigned KnownIDs[] = { 1804 LLVMContext::MD_tbaa, 1805 LLVMContext::MD_alias_scope, 1806 LLVMContext::MD_noalias, 1807 LLVMContext::MD_range, 1808 LLVMContext::MD_fpmath, 1809 LLVMContext::MD_invariant_load, 1810 }; 1811 combineMetadata(ReplInst, I, KnownIDs); 1812 } 1813 } 1814 1815 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { 1816 patchReplacementInstruction(I, Repl); 1817 I->replaceAllUsesWith(Repl); 1818 } 1819 1820 /// Attempt to eliminate a load, first by eliminating it 1821 /// locally, and then attempting non-local elimination if that fails. 1822 bool GVN::processLoad(LoadInst *L) { 1823 if (!MD) 1824 return false; 1825 1826 if (!L->isSimple()) 1827 return false; 1828 1829 if (L->use_empty()) { 1830 markInstructionForDeletion(L); 1831 return true; 1832 } 1833 1834 // ... to a pointer that has been loaded from before... 1835 MemDepResult Dep = MD->getDependency(L); 1836 const DataLayout &DL = L->getModule()->getDataLayout(); 1837 1838 // If we have a clobber and target data is around, see if this is a clobber 1839 // that we can fix up through code synthesis. 1840 if (Dep.isClobber()) { 1841 // Check to see if we have something like this: 1842 // store i32 123, i32* %P 1843 // %A = bitcast i32* %P to i8* 1844 // %B = gep i8* %A, i32 1 1845 // %C = load i8* %B 1846 // 1847 // We could do that by recognizing if the clobber instructions are obviously 1848 // a common base + constant offset, and if the previous store (or memset) 1849 // completely covers this load. This sort of thing can happen in bitfield 1850 // access code. 1851 Value *AvailVal = nullptr; 1852 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { 1853 int Offset = AnalyzeLoadFromClobberingStore( 1854 L->getType(), L->getPointerOperand(), DepSI); 1855 if (Offset != -1) 1856 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 1857 L->getType(), L, DL); 1858 } 1859 1860 // Check to see if we have something like this: 1861 // load i32* P 1862 // load i8* (P+1) 1863 // if we have this, replace the later with an extraction from the former. 1864 if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { 1865 // If this is a clobber and L is the first instruction in its block, then 1866 // we have the first instruction in the entry block. 1867 if (DepLI == L) 1868 return false; 1869 1870 int Offset = AnalyzeLoadFromClobberingLoad( 1871 L->getType(), L->getPointerOperand(), DepLI, DL); 1872 if (Offset != -1) 1873 AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); 1874 } 1875 1876 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1877 // a value on from it. 1878 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1879 int Offset = AnalyzeLoadFromClobberingMemInst( 1880 L->getType(), L->getPointerOperand(), DepMI, DL); 1881 if (Offset != -1) 1882 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL); 1883 } 1884 1885 if (AvailVal) { 1886 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1887 << *AvailVal << '\n' << *L << "\n\n\n"); 1888 1889 // Replace the load! 1890 L->replaceAllUsesWith(AvailVal); 1891 if (AvailVal->getType()->getScalarType()->isPointerTy()) 1892 MD->invalidateCachedPointerInfo(AvailVal); 1893 markInstructionForDeletion(L); 1894 ++NumGVNLoad; 1895 return true; 1896 } 1897 } 1898 1899 // If the value isn't available, don't do anything! 1900 if (Dep.isClobber()) { 1901 DEBUG( 1902 // fast print dep, using operator<< on instruction is too slow. 1903 dbgs() << "GVN: load "; 1904 L->printAsOperand(dbgs()); 1905 Instruction *I = Dep.getInst(); 1906 dbgs() << " is clobbered by " << *I << '\n'; 1907 ); 1908 return false; 1909 } 1910 1911 // If it is defined in another block, try harder. 1912 if (Dep.isNonLocal()) 1913 return processNonLocalLoad(L); 1914 1915 if (!Dep.isDef()) { 1916 DEBUG( 1917 // fast print dep, using operator<< on instruction is too slow. 1918 dbgs() << "GVN: load "; 1919 L->printAsOperand(dbgs()); 1920 dbgs() << " has unknown dependence\n"; 1921 ); 1922 return false; 1923 } 1924 1925 Instruction *DepInst = Dep.getInst(); 1926 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1927 Value *StoredVal = DepSI->getValueOperand(); 1928 1929 // The store and load are to a must-aliased pointer, but they may not 1930 // actually have the same type. See if we know how to reuse the stored 1931 // value (depending on its type). 1932 if (StoredVal->getType() != L->getType()) { 1933 StoredVal = 1934 CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, DL); 1935 if (!StoredVal) 1936 return false; 1937 1938 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1939 << '\n' << *L << "\n\n\n"); 1940 } 1941 1942 // Remove it! 1943 L->replaceAllUsesWith(StoredVal); 1944 if (StoredVal->getType()->getScalarType()->isPointerTy()) 1945 MD->invalidateCachedPointerInfo(StoredVal); 1946 markInstructionForDeletion(L); 1947 ++NumGVNLoad; 1948 return true; 1949 } 1950 1951 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1952 Value *AvailableVal = DepLI; 1953 1954 // The loads are of a must-aliased pointer, but they may not actually have 1955 // the same type. See if we know how to reuse the previously loaded value 1956 // (depending on its type). 1957 if (DepLI->getType() != L->getType()) { 1958 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, DL); 1959 if (!AvailableVal) 1960 return false; 1961 1962 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1963 << "\n" << *L << "\n\n\n"); 1964 } 1965 1966 // Remove it! 1967 patchAndReplaceAllUsesWith(L, AvailableVal); 1968 if (DepLI->getType()->getScalarType()->isPointerTy()) 1969 MD->invalidateCachedPointerInfo(DepLI); 1970 markInstructionForDeletion(L); 1971 ++NumGVNLoad; 1972 return true; 1973 } 1974 1975 // If this load really doesn't depend on anything, then we must be loading an 1976 // undef value. This can happen when loading for a fresh allocation with no 1977 // intervening stores, for example. 1978 if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { 1979 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1980 markInstructionForDeletion(L); 1981 ++NumGVNLoad; 1982 return true; 1983 } 1984 1985 // If this load occurs either right after a lifetime begin, 1986 // then the loaded value is undefined. 1987 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { 1988 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1989 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1990 markInstructionForDeletion(L); 1991 ++NumGVNLoad; 1992 return true; 1993 } 1994 } 1995 1996 // If this load follows a calloc (which zero initializes memory), 1997 // then the loaded value is zero 1998 if (isCallocLikeFn(DepInst, TLI)) { 1999 L->replaceAllUsesWith(Constant::getNullValue(L->getType())); 2000 markInstructionForDeletion(L); 2001 ++NumGVNLoad; 2002 return true; 2003 } 2004 2005 return false; 2006 } 2007 2008 // In order to find a leader for a given value number at a 2009 // specific basic block, we first obtain the list of all Values for that number, 2010 // and then scan the list to find one whose block dominates the block in 2011 // question. This is fast because dominator tree queries consist of only 2012 // a few comparisons of DFS numbers. 2013 Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { 2014 LeaderTableEntry Vals = LeaderTable[num]; 2015 if (!Vals.Val) return nullptr; 2016 2017 Value *Val = nullptr; 2018 if (DT->dominates(Vals.BB, BB)) { 2019 Val = Vals.Val; 2020 if (isa<Constant>(Val)) return Val; 2021 } 2022 2023 LeaderTableEntry* Next = Vals.Next; 2024 while (Next) { 2025 if (DT->dominates(Next->BB, BB)) { 2026 if (isa<Constant>(Next->Val)) return Next->Val; 2027 if (!Val) Val = Next->Val; 2028 } 2029 2030 Next = Next->Next; 2031 } 2032 2033 return Val; 2034 } 2035 2036 /// Replace all uses of 'From' with 'To' if the use is dominated by the given 2037 /// basic block. Returns the number of uses that were replaced. 2038 unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, 2039 const BasicBlockEdge &Root) { 2040 unsigned Count = 0; 2041 for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); 2042 UI != UE; ) { 2043 Use &U = *UI++; 2044 2045 if (DT->dominates(Root, U)) { 2046 U.set(To); 2047 ++Count; 2048 } 2049 } 2050 return Count; 2051 } 2052 2053 /// There is an edge from 'Src' to 'Dst'. Return 2054 /// true if every path from the entry block to 'Dst' passes via this edge. In 2055 /// particular 'Dst' must not be reachable via another edge from 'Src'. 2056 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, 2057 DominatorTree *DT) { 2058 // While in theory it is interesting to consider the case in which Dst has 2059 // more than one predecessor, because Dst might be part of a loop which is 2060 // only reachable from Src, in practice it is pointless since at the time 2061 // GVN runs all such loops have preheaders, which means that Dst will have 2062 // been changed to have only one predecessor, namely Src. 2063 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); 2064 const BasicBlock *Src = E.getStart(); 2065 assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); 2066 (void)Src; 2067 return Pred != nullptr; 2068 } 2069 2070 /// The given values are known to be equal in every block 2071 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with 2072 /// 'RHS' everywhere in the scope. Returns whether a change was made. 2073 bool GVN::propagateEquality(Value *LHS, Value *RHS, 2074 const BasicBlockEdge &Root) { 2075 SmallVector<std::pair<Value*, Value*>, 4> Worklist; 2076 Worklist.push_back(std::make_pair(LHS, RHS)); 2077 bool Changed = false; 2078 // For speed, compute a conservative fast approximation to 2079 // DT->dominates(Root, Root.getEnd()); 2080 bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); 2081 2082 while (!Worklist.empty()) { 2083 std::pair<Value*, Value*> Item = Worklist.pop_back_val(); 2084 LHS = Item.first; RHS = Item.second; 2085 2086 if (LHS == RHS) continue; 2087 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); 2088 2089 // Don't try to propagate equalities between constants. 2090 if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue; 2091 2092 // Prefer a constant on the right-hand side, or an Argument if no constants. 2093 if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS))) 2094 std::swap(LHS, RHS); 2095 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); 2096 2097 // If there is no obvious reason to prefer the left-hand side over the 2098 // right-hand side, ensure the longest lived term is on the right-hand side, 2099 // so the shortest lived term will be replaced by the longest lived. 2100 // This tends to expose more simplifications. 2101 uint32_t LVN = VN.lookup_or_add(LHS); 2102 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || 2103 (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { 2104 // Move the 'oldest' value to the right-hand side, using the value number 2105 // as a proxy for age. 2106 uint32_t RVN = VN.lookup_or_add(RHS); 2107 if (LVN < RVN) { 2108 std::swap(LHS, RHS); 2109 LVN = RVN; 2110 } 2111 } 2112 2113 // If value numbering later sees that an instruction in the scope is equal 2114 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve 2115 // the invariant that instructions only occur in the leader table for their 2116 // own value number (this is used by removeFromLeaderTable), do not do this 2117 // if RHS is an instruction (if an instruction in the scope is morphed into 2118 // LHS then it will be turned into RHS by the next GVN iteration anyway, so 2119 // using the leader table is about compiling faster, not optimizing better). 2120 // The leader table only tracks basic blocks, not edges. Only add to if we 2121 // have the simple case where the edge dominates the end. 2122 if (RootDominatesEnd && !isa<Instruction>(RHS)) 2123 addToLeaderTable(LVN, RHS, Root.getEnd()); 2124 2125 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As 2126 // LHS always has at least one use that is not dominated by Root, this will 2127 // never do anything if LHS has only one use. 2128 if (!LHS->hasOneUse()) { 2129 unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); 2130 Changed |= NumReplacements > 0; 2131 NumGVNEqProp += NumReplacements; 2132 } 2133 2134 // Now try to deduce additional equalities from this one. For example, if 2135 // the known equality was "(A != B)" == "false" then it follows that A and B 2136 // are equal in the scope. Only boolean equalities with an explicit true or 2137 // false RHS are currently supported. 2138 if (!RHS->getType()->isIntegerTy(1)) 2139 // Not a boolean equality - bail out. 2140 continue; 2141 ConstantInt *CI = dyn_cast<ConstantInt>(RHS); 2142 if (!CI) 2143 // RHS neither 'true' nor 'false' - bail out. 2144 continue; 2145 // Whether RHS equals 'true'. Otherwise it equals 'false'. 2146 bool isKnownTrue = CI->isAllOnesValue(); 2147 bool isKnownFalse = !isKnownTrue; 2148 2149 // If "A && B" is known true then both A and B are known true. If "A || B" 2150 // is known false then both A and B are known false. 2151 Value *A, *B; 2152 if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || 2153 (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { 2154 Worklist.push_back(std::make_pair(A, RHS)); 2155 Worklist.push_back(std::make_pair(B, RHS)); 2156 continue; 2157 } 2158 2159 // If we are propagating an equality like "(A == B)" == "true" then also 2160 // propagate the equality A == B. When propagating a comparison such as 2161 // "(A >= B)" == "true", replace all instances of "A < B" with "false". 2162 if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) { 2163 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); 2164 2165 // If "A == B" is known true, or "A != B" is known false, then replace 2166 // A with B everywhere in the scope. 2167 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || 2168 (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) 2169 Worklist.push_back(std::make_pair(Op0, Op1)); 2170 2171 // Handle the floating point versions of equality comparisons too. 2172 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) || 2173 (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) { 2174 2175 // Floating point -0.0 and 0.0 compare equal, so we can only 2176 // propagate values if we know that we have a constant and that 2177 // its value is non-zero. 2178 2179 // FIXME: We should do this optimization if 'no signed zeros' is 2180 // applicable via an instruction-level fast-math-flag or some other 2181 // indicator that relaxed FP semantics are being used. 2182 2183 if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero()) 2184 Worklist.push_back(std::make_pair(Op0, Op1)); 2185 } 2186 2187 // If "A >= B" is known true, replace "A < B" with false everywhere. 2188 CmpInst::Predicate NotPred = Cmp->getInversePredicate(); 2189 Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); 2190 // Since we don't have the instruction "A < B" immediately to hand, work 2191 // out the value number that it would have and use that to find an 2192 // appropriate instruction (if any). 2193 uint32_t NextNum = VN.getNextUnusedValueNumber(); 2194 uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1); 2195 // If the number we were assigned was brand new then there is no point in 2196 // looking for an instruction realizing it: there cannot be one! 2197 if (Num < NextNum) { 2198 Value *NotCmp = findLeader(Root.getEnd(), Num); 2199 if (NotCmp && isa<Instruction>(NotCmp)) { 2200 unsigned NumReplacements = 2201 replaceAllDominatedUsesWith(NotCmp, NotVal, Root); 2202 Changed |= NumReplacements > 0; 2203 NumGVNEqProp += NumReplacements; 2204 } 2205 } 2206 // Ensure that any instruction in scope that gets the "A < B" value number 2207 // is replaced with false. 2208 // The leader table only tracks basic blocks, not edges. Only add to if we 2209 // have the simple case where the edge dominates the end. 2210 if (RootDominatesEnd) 2211 addToLeaderTable(Num, NotVal, Root.getEnd()); 2212 2213 continue; 2214 } 2215 } 2216 2217 return Changed; 2218 } 2219 2220 /// When calculating availability, handle an instruction 2221 /// by inserting it into the appropriate sets 2222 bool GVN::processInstruction(Instruction *I) { 2223 // Ignore dbg info intrinsics. 2224 if (isa<DbgInfoIntrinsic>(I)) 2225 return false; 2226 2227 // If the instruction can be easily simplified then do so now in preference 2228 // to value numbering it. Value numbering often exposes redundancies, for 2229 // example if it determines that %y is equal to %x then the instruction 2230 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 2231 const DataLayout &DL = I->getModule()->getDataLayout(); 2232 if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { 2233 I->replaceAllUsesWith(V); 2234 if (MD && V->getType()->getScalarType()->isPointerTy()) 2235 MD->invalidateCachedPointerInfo(V); 2236 markInstructionForDeletion(I); 2237 ++NumGVNSimpl; 2238 return true; 2239 } 2240 2241 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 2242 if (processLoad(LI)) 2243 return true; 2244 2245 unsigned Num = VN.lookup_or_add(LI); 2246 addToLeaderTable(Num, LI, LI->getParent()); 2247 return false; 2248 } 2249 2250 // For conditional branches, we can perform simple conditional propagation on 2251 // the condition value itself. 2252 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 2253 if (!BI->isConditional()) 2254 return false; 2255 2256 if (isa<Constant>(BI->getCondition())) 2257 return processFoldableCondBr(BI); 2258 2259 Value *BranchCond = BI->getCondition(); 2260 BasicBlock *TrueSucc = BI->getSuccessor(0); 2261 BasicBlock *FalseSucc = BI->getSuccessor(1); 2262 // Avoid multiple edges early. 2263 if (TrueSucc == FalseSucc) 2264 return false; 2265 2266 BasicBlock *Parent = BI->getParent(); 2267 bool Changed = false; 2268 2269 Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); 2270 BasicBlockEdge TrueE(Parent, TrueSucc); 2271 Changed |= propagateEquality(BranchCond, TrueVal, TrueE); 2272 2273 Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); 2274 BasicBlockEdge FalseE(Parent, FalseSucc); 2275 Changed |= propagateEquality(BranchCond, FalseVal, FalseE); 2276 2277 return Changed; 2278 } 2279 2280 // For switches, propagate the case values into the case destinations. 2281 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { 2282 Value *SwitchCond = SI->getCondition(); 2283 BasicBlock *Parent = SI->getParent(); 2284 bool Changed = false; 2285 2286 // Remember how many outgoing edges there are to every successor. 2287 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; 2288 for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) 2289 ++SwitchEdges[SI->getSuccessor(i)]; 2290 2291 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); 2292 i != e; ++i) { 2293 BasicBlock *Dst = i.getCaseSuccessor(); 2294 // If there is only a single edge, propagate the case value into it. 2295 if (SwitchEdges.lookup(Dst) == 1) { 2296 BasicBlockEdge E(Parent, Dst); 2297 Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); 2298 } 2299 } 2300 return Changed; 2301 } 2302 2303 // Instructions with void type don't return a value, so there's 2304 // no point in trying to find redundancies in them. 2305 if (I->getType()->isVoidTy()) return false; 2306 2307 uint32_t NextNum = VN.getNextUnusedValueNumber(); 2308 unsigned Num = VN.lookup_or_add(I); 2309 2310 // Allocations are always uniquely numbered, so we can save time and memory 2311 // by fast failing them. 2312 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 2313 addToLeaderTable(Num, I, I->getParent()); 2314 return false; 2315 } 2316 2317 // If the number we were assigned was a brand new VN, then we don't 2318 // need to do a lookup to see if the number already exists 2319 // somewhere in the domtree: it can't! 2320 if (Num >= NextNum) { 2321 addToLeaderTable(Num, I, I->getParent()); 2322 return false; 2323 } 2324 2325 // Perform fast-path value-number based elimination of values inherited from 2326 // dominators. 2327 Value *repl = findLeader(I->getParent(), Num); 2328 if (!repl) { 2329 // Failure, just remember this instance for future use. 2330 addToLeaderTable(Num, I, I->getParent()); 2331 return false; 2332 } 2333 2334 // Remove it! 2335 patchAndReplaceAllUsesWith(I, repl); 2336 if (MD && repl->getType()->getScalarType()->isPointerTy()) 2337 MD->invalidateCachedPointerInfo(repl); 2338 markInstructionForDeletion(I); 2339 return true; 2340 } 2341 2342 /// runOnFunction - This is the main transformation entry point for a function. 2343 bool GVN::runOnFunction(Function& F) { 2344 if (skipOptnoneFunction(F)) 2345 return false; 2346 2347 if (!NoLoads) 2348 MD = &getAnalysis<MemoryDependenceAnalysis>(); 2349 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2350 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 2351 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 2352 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 2353 VN.setMemDep(MD); 2354 VN.setDomTree(DT); 2355 2356 bool Changed = false; 2357 bool ShouldContinue = true; 2358 2359 // Merge unconditional branches, allowing PRE to catch more 2360 // optimization opportunities. 2361 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 2362 BasicBlock *BB = FI++; 2363 2364 bool removedBlock = MergeBlockIntoPredecessor( 2365 BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD); 2366 if (removedBlock) ++NumGVNBlocks; 2367 2368 Changed |= removedBlock; 2369 } 2370 2371 unsigned Iteration = 0; 2372 while (ShouldContinue) { 2373 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 2374 ShouldContinue = iterateOnFunction(F); 2375 Changed |= ShouldContinue; 2376 ++Iteration; 2377 } 2378 2379 if (EnablePRE) { 2380 // Fabricate val-num for dead-code in order to suppress assertion in 2381 // performPRE(). 2382 assignValNumForDeadCode(); 2383 bool PREChanged = true; 2384 while (PREChanged) { 2385 PREChanged = performPRE(F); 2386 Changed |= PREChanged; 2387 } 2388 } 2389 2390 // FIXME: Should perform GVN again after PRE does something. PRE can move 2391 // computations into blocks where they become fully redundant. Note that 2392 // we can't do this until PRE's critical edge splitting updates memdep. 2393 // Actually, when this happens, we should just fully integrate PRE into GVN. 2394 2395 cleanupGlobalSets(); 2396 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each 2397 // iteration. 2398 DeadBlocks.clear(); 2399 2400 return Changed; 2401 } 2402 2403 2404 bool GVN::processBlock(BasicBlock *BB) { 2405 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function 2406 // (and incrementing BI before processing an instruction). 2407 assert(InstrsToErase.empty() && 2408 "We expect InstrsToErase to be empty across iterations"); 2409 if (DeadBlocks.count(BB)) 2410 return false; 2411 2412 bool ChangedFunction = false; 2413 2414 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 2415 BI != BE;) { 2416 ChangedFunction |= processInstruction(BI); 2417 if (InstrsToErase.empty()) { 2418 ++BI; 2419 continue; 2420 } 2421 2422 // If we need some instructions deleted, do it now. 2423 NumGVNInstr += InstrsToErase.size(); 2424 2425 // Avoid iterator invalidation. 2426 bool AtStart = BI == BB->begin(); 2427 if (!AtStart) 2428 --BI; 2429 2430 for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(), 2431 E = InstrsToErase.end(); I != E; ++I) { 2432 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 2433 if (MD) MD->removeInstruction(*I); 2434 DEBUG(verifyRemoved(*I)); 2435 (*I)->eraseFromParent(); 2436 } 2437 InstrsToErase.clear(); 2438 2439 if (AtStart) 2440 BI = BB->begin(); 2441 else 2442 ++BI; 2443 } 2444 2445 return ChangedFunction; 2446 } 2447 2448 // Instantiate an expression in a predecessor that lacked it. 2449 bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, 2450 unsigned int ValNo) { 2451 // Because we are going top-down through the block, all value numbers 2452 // will be available in the predecessor by the time we need them. Any 2453 // that weren't originally present will have been instantiated earlier 2454 // in this loop. 2455 bool success = true; 2456 for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) { 2457 Value *Op = Instr->getOperand(i); 2458 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 2459 continue; 2460 2461 if (Value *V = findLeader(Pred, VN.lookup(Op))) { 2462 Instr->setOperand(i, V); 2463 } else { 2464 success = false; 2465 break; 2466 } 2467 } 2468 2469 // Fail out if we encounter an operand that is not available in 2470 // the PRE predecessor. This is typically because of loads which 2471 // are not value numbered precisely. 2472 if (!success) 2473 return false; 2474 2475 Instr->insertBefore(Pred->getTerminator()); 2476 Instr->setName(Instr->getName() + ".pre"); 2477 Instr->setDebugLoc(Instr->getDebugLoc()); 2478 VN.add(Instr, ValNo); 2479 2480 // Update the availability map to include the new instruction. 2481 addToLeaderTable(ValNo, Instr, Pred); 2482 return true; 2483 } 2484 2485 bool GVN::performScalarPRE(Instruction *CurInst) { 2486 SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap; 2487 2488 if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) || 2489 isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() || 2490 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 2491 isa<DbgInfoIntrinsic>(CurInst)) 2492 return false; 2493 2494 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from 2495 // sinking the compare again, and it would force the code generator to 2496 // move the i1 from processor flags or predicate registers into a general 2497 // purpose register. 2498 if (isa<CmpInst>(CurInst)) 2499 return false; 2500 2501 // We don't currently value number ANY inline asm calls. 2502 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 2503 if (CallI->isInlineAsm()) 2504 return false; 2505 2506 uint32_t ValNo = VN.lookup(CurInst); 2507 2508 // Look for the predecessors for PRE opportunities. We're 2509 // only trying to solve the basic diamond case, where 2510 // a value is computed in the successor and one predecessor, 2511 // but not the other. We also explicitly disallow cases 2512 // where the successor is its own predecessor, because they're 2513 // more complicated to get right. 2514 unsigned NumWith = 0; 2515 unsigned NumWithout = 0; 2516 BasicBlock *PREPred = nullptr; 2517 BasicBlock *CurrentBlock = CurInst->getParent(); 2518 predMap.clear(); 2519 2520 for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); 2521 PI != PE; ++PI) { 2522 BasicBlock *P = *PI; 2523 // We're not interested in PRE where the block is its 2524 // own predecessor, or in blocks with predecessors 2525 // that are not reachable. 2526 if (P == CurrentBlock) { 2527 NumWithout = 2; 2528 break; 2529 } else if (!DT->isReachableFromEntry(P)) { 2530 NumWithout = 2; 2531 break; 2532 } 2533 2534 Value *predV = findLeader(P, ValNo); 2535 if (!predV) { 2536 predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); 2537 PREPred = P; 2538 ++NumWithout; 2539 } else if (predV == CurInst) { 2540 /* CurInst dominates this predecessor. */ 2541 NumWithout = 2; 2542 break; 2543 } else { 2544 predMap.push_back(std::make_pair(predV, P)); 2545 ++NumWith; 2546 } 2547 } 2548 2549 // Don't do PRE when it might increase code size, i.e. when 2550 // we would need to insert instructions in more than one pred. 2551 if (NumWithout > 1 || NumWith == 0) 2552 return false; 2553 2554 // We may have a case where all predecessors have the instruction, 2555 // and we just need to insert a phi node. Otherwise, perform 2556 // insertion. 2557 Instruction *PREInstr = nullptr; 2558 2559 if (NumWithout != 0) { 2560 // Don't do PRE across indirect branch. 2561 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2562 return false; 2563 2564 // We can't do PRE safely on a critical edge, so instead we schedule 2565 // the edge to be split and perform the PRE the next time we iterate 2566 // on the function. 2567 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 2568 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2569 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2570 return false; 2571 } 2572 // We need to insert somewhere, so let's give it a shot 2573 PREInstr = CurInst->clone(); 2574 if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { 2575 // If we failed insertion, make sure we remove the instruction. 2576 DEBUG(verifyRemoved(PREInstr)); 2577 delete PREInstr; 2578 return false; 2579 } 2580 } 2581 2582 // Either we should have filled in the PRE instruction, or we should 2583 // not have needed insertions. 2584 assert (PREInstr != nullptr || NumWithout == 0); 2585 2586 ++NumGVNPRE; 2587 2588 // Create a PHI to make the value available in this block. 2589 PHINode *Phi = 2590 PHINode::Create(CurInst->getType(), predMap.size(), 2591 CurInst->getName() + ".pre-phi", CurrentBlock->begin()); 2592 for (unsigned i = 0, e = predMap.size(); i != e; ++i) { 2593 if (Value *V = predMap[i].first) 2594 Phi->addIncoming(V, predMap[i].second); 2595 else 2596 Phi->addIncoming(PREInstr, PREPred); 2597 } 2598 2599 VN.add(Phi, ValNo); 2600 addToLeaderTable(ValNo, Phi, CurrentBlock); 2601 Phi->setDebugLoc(CurInst->getDebugLoc()); 2602 CurInst->replaceAllUsesWith(Phi); 2603 if (Phi->getType()->getScalarType()->isPointerTy()) { 2604 // Because we have added a PHI-use of the pointer value, it has now 2605 // "escaped" from alias analysis' perspective. We need to inform 2606 // AA of this. 2607 for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) { 2608 unsigned jj = PHINode::getOperandNumForIncomingValue(ii); 2609 VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); 2610 } 2611 2612 if (MD) 2613 MD->invalidateCachedPointerInfo(Phi); 2614 } 2615 VN.erase(CurInst); 2616 removeFromLeaderTable(ValNo, CurInst, CurrentBlock); 2617 2618 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 2619 if (MD) 2620 MD->removeInstruction(CurInst); 2621 DEBUG(verifyRemoved(CurInst)); 2622 CurInst->eraseFromParent(); 2623 ++NumGVNInstr; 2624 2625 return true; 2626 } 2627 2628 /// Perform a purely local form of PRE that looks for diamond 2629 /// control flow patterns and attempts to perform simple PRE at the join point. 2630 bool GVN::performPRE(Function &F) { 2631 bool Changed = false; 2632 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { 2633 // Nothing to PRE in the entry block. 2634 if (CurrentBlock == &F.getEntryBlock()) 2635 continue; 2636 2637 // Don't perform PRE on a landing pad. 2638 if (CurrentBlock->isLandingPad()) 2639 continue; 2640 2641 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2642 BE = CurrentBlock->end(); 2643 BI != BE;) { 2644 Instruction *CurInst = BI++; 2645 Changed = performScalarPRE(CurInst); 2646 } 2647 } 2648 2649 if (splitCriticalEdges()) 2650 Changed = true; 2651 2652 return Changed; 2653 } 2654 2655 /// Split the critical edge connecting the given two blocks, and return 2656 /// the block inserted to the critical edge. 2657 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { 2658 BasicBlock *BB = SplitCriticalEdge( 2659 Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT)); 2660 if (MD) 2661 MD->invalidateCachedPredecessors(); 2662 return BB; 2663 } 2664 2665 /// Split critical edges found during the previous 2666 /// iteration that may enable further optimization. 2667 bool GVN::splitCriticalEdges() { 2668 if (toSplit.empty()) 2669 return false; 2670 do { 2671 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 2672 SplitCriticalEdge(Edge.first, Edge.second, 2673 CriticalEdgeSplittingOptions(getAliasAnalysis(), DT)); 2674 } while (!toSplit.empty()); 2675 if (MD) MD->invalidateCachedPredecessors(); 2676 return true; 2677 } 2678 2679 /// Executes one iteration of GVN 2680 bool GVN::iterateOnFunction(Function &F) { 2681 cleanupGlobalSets(); 2682 2683 // Top-down walk of the dominator tree 2684 bool Changed = false; 2685 // Save the blocks this function have before transformation begins. GVN may 2686 // split critical edge, and hence may invalidate the RPO/DT iterator. 2687 // 2688 std::vector<BasicBlock *> BBVect; 2689 BBVect.reserve(256); 2690 // Needed for value numbering with phi construction to work. 2691 ReversePostOrderTraversal<Function *> RPOT(&F); 2692 for (ReversePostOrderTraversal<Function *>::rpo_iterator RI = RPOT.begin(), 2693 RE = RPOT.end(); 2694 RI != RE; ++RI) 2695 BBVect.push_back(*RI); 2696 2697 for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end(); 2698 I != E; I++) 2699 Changed |= processBlock(*I); 2700 2701 return Changed; 2702 } 2703 2704 void GVN::cleanupGlobalSets() { 2705 VN.clear(); 2706 LeaderTable.clear(); 2707 TableAllocator.Reset(); 2708 } 2709 2710 /// Verify that the specified instruction does not occur in our 2711 /// internal data structures. 2712 void GVN::verifyRemoved(const Instruction *Inst) const { 2713 VN.verifyRemoved(Inst); 2714 2715 // Walk through the value number scope to make sure the instruction isn't 2716 // ferreted away in it. 2717 for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator 2718 I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { 2719 const LeaderTableEntry *Node = &I->second; 2720 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2721 2722 while (Node->Next) { 2723 Node = Node->Next; 2724 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2725 } 2726 } 2727 } 2728 2729 /// BB is declared dead, which implied other blocks become dead as well. This 2730 /// function is to add all these blocks to "DeadBlocks". For the dead blocks' 2731 /// live successors, update their phi nodes by replacing the operands 2732 /// corresponding to dead blocks with UndefVal. 2733 void GVN::addDeadBlock(BasicBlock *BB) { 2734 SmallVector<BasicBlock *, 4> NewDead; 2735 SmallSetVector<BasicBlock *, 4> DF; 2736 2737 NewDead.push_back(BB); 2738 while (!NewDead.empty()) { 2739 BasicBlock *D = NewDead.pop_back_val(); 2740 if (DeadBlocks.count(D)) 2741 continue; 2742 2743 // All blocks dominated by D are dead. 2744 SmallVector<BasicBlock *, 8> Dom; 2745 DT->getDescendants(D, Dom); 2746 DeadBlocks.insert(Dom.begin(), Dom.end()); 2747 2748 // Figure out the dominance-frontier(D). 2749 for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(), 2750 E = Dom.end(); I != E; I++) { 2751 BasicBlock *B = *I; 2752 for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) { 2753 BasicBlock *S = *SI; 2754 if (DeadBlocks.count(S)) 2755 continue; 2756 2757 bool AllPredDead = true; 2758 for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++) 2759 if (!DeadBlocks.count(*PI)) { 2760 AllPredDead = false; 2761 break; 2762 } 2763 2764 if (!AllPredDead) { 2765 // S could be proved dead later on. That is why we don't update phi 2766 // operands at this moment. 2767 DF.insert(S); 2768 } else { 2769 // While S is not dominated by D, it is dead by now. This could take 2770 // place if S already have a dead predecessor before D is declared 2771 // dead. 2772 NewDead.push_back(S); 2773 } 2774 } 2775 } 2776 } 2777 2778 // For the dead blocks' live successors, update their phi nodes by replacing 2779 // the operands corresponding to dead blocks with UndefVal. 2780 for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end(); 2781 I != E; I++) { 2782 BasicBlock *B = *I; 2783 if (DeadBlocks.count(B)) 2784 continue; 2785 2786 SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B)); 2787 for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(), 2788 PE = Preds.end(); PI != PE; PI++) { 2789 BasicBlock *P = *PI; 2790 2791 if (!DeadBlocks.count(P)) 2792 continue; 2793 2794 if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) { 2795 if (BasicBlock *S = splitCriticalEdges(P, B)) 2796 DeadBlocks.insert(P = S); 2797 } 2798 2799 for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) { 2800 PHINode &Phi = cast<PHINode>(*II); 2801 Phi.setIncomingValue(Phi.getBasicBlockIndex(P), 2802 UndefValue::get(Phi.getType())); 2803 } 2804 } 2805 } 2806 } 2807 2808 // If the given branch is recognized as a foldable branch (i.e. conditional 2809 // branch with constant condition), it will perform following analyses and 2810 // transformation. 2811 // 1) If the dead out-coming edge is a critical-edge, split it. Let 2812 // R be the target of the dead out-coming edge. 2813 // 1) Identify the set of dead blocks implied by the branch's dead outcoming 2814 // edge. The result of this step will be {X| X is dominated by R} 2815 // 2) Identify those blocks which haves at least one dead prodecessor. The 2816 // result of this step will be dominance-frontier(R). 2817 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to 2818 // dead blocks with "UndefVal" in an hope these PHIs will optimized away. 2819 // 2820 // Return true iff *NEW* dead code are found. 2821 bool GVN::processFoldableCondBr(BranchInst *BI) { 2822 if (!BI || BI->isUnconditional()) 2823 return false; 2824 2825 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); 2826 if (!Cond) 2827 return false; 2828 2829 BasicBlock *DeadRoot = Cond->getZExtValue() ? 2830 BI->getSuccessor(1) : BI->getSuccessor(0); 2831 if (DeadBlocks.count(DeadRoot)) 2832 return false; 2833 2834 if (!DeadRoot->getSinglePredecessor()) 2835 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot); 2836 2837 addDeadBlock(DeadRoot); 2838 return true; 2839 } 2840 2841 // performPRE() will trigger assert if it comes across an instruction without 2842 // associated val-num. As it normally has far more live instructions than dead 2843 // instructions, it makes more sense just to "fabricate" a val-number for the 2844 // dead code than checking if instruction involved is dead or not. 2845 void GVN::assignValNumForDeadCode() { 2846 for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(), 2847 E = DeadBlocks.end(); I != E; I++) { 2848 BasicBlock *BB = *I; 2849 for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); 2850 II != EE; II++) { 2851 Instruction *Inst = &*II; 2852 unsigned ValNum = VN.lookup_or_add(Inst); 2853 addToLeaderTable(ValNum, Inst, BB); 2854 } 2855 } 2856 } 2857