1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global value numbering to eliminate fully redundant 11 // instructions. It also performs simple dead load elimination. 12 // 13 // Note that this pass does the value numbering itself; it does not use the 14 // ValueNumbering analysis passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #define DEBUG_TYPE "gvn" 19 #include "llvm/Transforms/Scalar.h" 20 #include "llvm/GlobalVariable.h" 21 #include "llvm/IntrinsicInst.h" 22 #include "llvm/LLVMContext.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Analysis/ConstantFolding.h" 25 #include "llvm/Analysis/Dominators.h" 26 #include "llvm/Analysis/InstructionSimplify.h" 27 #include "llvm/Analysis/Loads.h" 28 #include "llvm/Analysis/MemoryBuiltins.h" 29 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 30 #include "llvm/Analysis/PHITransAddr.h" 31 #include "llvm/Analysis/ValueTracking.h" 32 #include "llvm/Assembly/Writer.h" 33 #include "llvm/Target/TargetData.h" 34 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 35 #include "llvm/Transforms/Utils/SSAUpdater.h" 36 #include "llvm/ADT/DenseMap.h" 37 #include "llvm/ADT/DepthFirstIterator.h" 38 #include "llvm/ADT/SmallPtrSet.h" 39 #include "llvm/ADT/Statistic.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/IRBuilder.h" 44 #include "llvm/Support/PatternMatch.h" 45 using namespace llvm; 46 using namespace PatternMatch; 47 48 STATISTIC(NumGVNInstr, "Number of instructions deleted"); 49 STATISTIC(NumGVNLoad, "Number of loads deleted"); 50 STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 51 STATISTIC(NumGVNBlocks, "Number of blocks merged"); 52 STATISTIC(NumGVNSimpl, "Number of instructions simplified"); 53 STATISTIC(NumGVNEqProp, "Number of equalities propagated"); 54 STATISTIC(NumPRELoad, "Number of loads PRE'd"); 55 56 static cl::opt<bool> EnablePRE("enable-pre", 57 cl::init(true), cl::Hidden); 58 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 59 60 //===----------------------------------------------------------------------===// 61 // ValueTable Class 62 //===----------------------------------------------------------------------===// 63 64 /// This class holds the mapping between values and value numbers. It is used 65 /// as an efficient mechanism to determine the expression-wise equivalence of 66 /// two values. 67 namespace { 68 struct Expression { 69 uint32_t opcode; 70 Type *type; 71 SmallVector<uint32_t, 4> varargs; 72 73 Expression(uint32_t o = ~2U) : opcode(o) { } 74 75 bool operator==(const Expression &other) const { 76 if (opcode != other.opcode) 77 return false; 78 if (opcode == ~0U || opcode == ~1U) 79 return true; 80 if (type != other.type) 81 return false; 82 if (varargs != other.varargs) 83 return false; 84 return true; 85 } 86 }; 87 88 class ValueTable { 89 DenseMap<Value*, uint32_t> valueNumbering; 90 DenseMap<Expression, uint32_t> expressionNumbering; 91 AliasAnalysis *AA; 92 MemoryDependenceAnalysis *MD; 93 DominatorTree *DT; 94 95 uint32_t nextValueNumber; 96 97 Expression create_expression(Instruction* I); 98 Expression create_extractvalue_expression(ExtractValueInst* EI); 99 uint32_t lookup_or_add_call(CallInst* C); 100 public: 101 ValueTable() : nextValueNumber(1) { } 102 uint32_t lookup_or_add(Value *V); 103 uint32_t lookup(Value *V) const; 104 void add(Value *V, uint32_t num); 105 void clear(); 106 void erase(Value *v); 107 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 108 AliasAnalysis *getAliasAnalysis() const { return AA; } 109 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 110 void setDomTree(DominatorTree* D) { DT = D; } 111 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 112 void verifyRemoved(const Value *) const; 113 }; 114 } 115 116 namespace llvm { 117 template <> struct DenseMapInfo<Expression> { 118 static inline Expression getEmptyKey() { 119 return ~0U; 120 } 121 122 static inline Expression getTombstoneKey() { 123 return ~1U; 124 } 125 126 static unsigned getHashValue(const Expression e) { 127 unsigned hash = e.opcode; 128 129 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 130 (unsigned)((uintptr_t)e.type >> 9)); 131 132 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 133 E = e.varargs.end(); I != E; ++I) 134 hash = *I + hash * 37; 135 136 return hash; 137 } 138 static bool isEqual(const Expression &LHS, const Expression &RHS) { 139 return LHS == RHS; 140 } 141 }; 142 143 } 144 145 //===----------------------------------------------------------------------===// 146 // ValueTable Internal Functions 147 //===----------------------------------------------------------------------===// 148 149 Expression ValueTable::create_expression(Instruction *I) { 150 Expression e; 151 e.type = I->getType(); 152 e.opcode = I->getOpcode(); 153 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 154 OI != OE; ++OI) 155 e.varargs.push_back(lookup_or_add(*OI)); 156 157 if (CmpInst *C = dyn_cast<CmpInst>(I)) { 158 e.opcode = (C->getOpcode() << 8) | C->getPredicate(); 159 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 160 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 161 II != IE; ++II) 162 e.varargs.push_back(*II); 163 } 164 165 return e; 166 } 167 168 Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { 169 assert(EI != 0 && "Not an ExtractValueInst?"); 170 Expression e; 171 e.type = EI->getType(); 172 e.opcode = 0; 173 174 IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); 175 if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { 176 // EI might be an extract from one of our recognised intrinsics. If it 177 // is we'll synthesize a semantically equivalent expression instead on 178 // an extract value expression. 179 switch (I->getIntrinsicID()) { 180 case Intrinsic::sadd_with_overflow: 181 case Intrinsic::uadd_with_overflow: 182 e.opcode = Instruction::Add; 183 break; 184 case Intrinsic::ssub_with_overflow: 185 case Intrinsic::usub_with_overflow: 186 e.opcode = Instruction::Sub; 187 break; 188 case Intrinsic::smul_with_overflow: 189 case Intrinsic::umul_with_overflow: 190 e.opcode = Instruction::Mul; 191 break; 192 default: 193 break; 194 } 195 196 if (e.opcode != 0) { 197 // Intrinsic recognized. Grab its args to finish building the expression. 198 assert(I->getNumArgOperands() == 2 && 199 "Expect two args for recognised intrinsics."); 200 e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); 201 e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); 202 return e; 203 } 204 } 205 206 // Not a recognised intrinsic. Fall back to producing an extract value 207 // expression. 208 e.opcode = EI->getOpcode(); 209 for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); 210 OI != OE; ++OI) 211 e.varargs.push_back(lookup_or_add(*OI)); 212 213 for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); 214 II != IE; ++II) 215 e.varargs.push_back(*II); 216 217 return e; 218 } 219 220 //===----------------------------------------------------------------------===// 221 // ValueTable External Functions 222 //===----------------------------------------------------------------------===// 223 224 /// add - Insert a value into the table with a specified value number. 225 void ValueTable::add(Value *V, uint32_t num) { 226 valueNumbering.insert(std::make_pair(V, num)); 227 } 228 229 uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 230 if (AA->doesNotAccessMemory(C)) { 231 Expression exp = create_expression(C); 232 uint32_t& e = expressionNumbering[exp]; 233 if (!e) e = nextValueNumber++; 234 valueNumbering[C] = e; 235 return e; 236 } else if (AA->onlyReadsMemory(C)) { 237 Expression exp = create_expression(C); 238 uint32_t& e = expressionNumbering[exp]; 239 if (!e) { 240 e = nextValueNumber++; 241 valueNumbering[C] = e; 242 return e; 243 } 244 if (!MD) { 245 e = nextValueNumber++; 246 valueNumbering[C] = e; 247 return e; 248 } 249 250 MemDepResult local_dep = MD->getDependency(C); 251 252 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 253 valueNumbering[C] = nextValueNumber; 254 return nextValueNumber++; 255 } 256 257 if (local_dep.isDef()) { 258 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 259 260 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 261 valueNumbering[C] = nextValueNumber; 262 return nextValueNumber++; 263 } 264 265 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 266 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 267 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 268 if (c_vn != cd_vn) { 269 valueNumbering[C] = nextValueNumber; 270 return nextValueNumber++; 271 } 272 } 273 274 uint32_t v = lookup_or_add(local_cdep); 275 valueNumbering[C] = v; 276 return v; 277 } 278 279 // Non-local case. 280 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 281 MD->getNonLocalCallDependency(CallSite(C)); 282 // FIXME: Move the checking logic to MemDep! 283 CallInst* cdep = 0; 284 285 // Check to see if we have a single dominating call instruction that is 286 // identical to C. 287 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 288 const NonLocalDepEntry *I = &deps[i]; 289 if (I->getResult().isNonLocal()) 290 continue; 291 292 // We don't handle non-definitions. If we already have a call, reject 293 // instruction dependencies. 294 if (!I->getResult().isDef() || cdep != 0) { 295 cdep = 0; 296 break; 297 } 298 299 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 300 // FIXME: All duplicated with non-local case. 301 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 302 cdep = NonLocalDepCall; 303 continue; 304 } 305 306 cdep = 0; 307 break; 308 } 309 310 if (!cdep) { 311 valueNumbering[C] = nextValueNumber; 312 return nextValueNumber++; 313 } 314 315 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 316 valueNumbering[C] = nextValueNumber; 317 return nextValueNumber++; 318 } 319 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 320 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 321 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 322 if (c_vn != cd_vn) { 323 valueNumbering[C] = nextValueNumber; 324 return nextValueNumber++; 325 } 326 } 327 328 uint32_t v = lookup_or_add(cdep); 329 valueNumbering[C] = v; 330 return v; 331 332 } else { 333 valueNumbering[C] = nextValueNumber; 334 return nextValueNumber++; 335 } 336 } 337 338 /// lookup_or_add - Returns the value number for the specified value, assigning 339 /// it a new number if it did not have one before. 340 uint32_t ValueTable::lookup_or_add(Value *V) { 341 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 342 if (VI != valueNumbering.end()) 343 return VI->second; 344 345 if (!isa<Instruction>(V)) { 346 valueNumbering[V] = nextValueNumber; 347 return nextValueNumber++; 348 } 349 350 Instruction* I = cast<Instruction>(V); 351 Expression exp; 352 switch (I->getOpcode()) { 353 case Instruction::Call: 354 return lookup_or_add_call(cast<CallInst>(I)); 355 case Instruction::Add: 356 case Instruction::FAdd: 357 case Instruction::Sub: 358 case Instruction::FSub: 359 case Instruction::Mul: 360 case Instruction::FMul: 361 case Instruction::UDiv: 362 case Instruction::SDiv: 363 case Instruction::FDiv: 364 case Instruction::URem: 365 case Instruction::SRem: 366 case Instruction::FRem: 367 case Instruction::Shl: 368 case Instruction::LShr: 369 case Instruction::AShr: 370 case Instruction::And: 371 case Instruction::Or : 372 case Instruction::Xor: 373 case Instruction::ICmp: 374 case Instruction::FCmp: 375 case Instruction::Trunc: 376 case Instruction::ZExt: 377 case Instruction::SExt: 378 case Instruction::FPToUI: 379 case Instruction::FPToSI: 380 case Instruction::UIToFP: 381 case Instruction::SIToFP: 382 case Instruction::FPTrunc: 383 case Instruction::FPExt: 384 case Instruction::PtrToInt: 385 case Instruction::IntToPtr: 386 case Instruction::BitCast: 387 case Instruction::Select: 388 case Instruction::ExtractElement: 389 case Instruction::InsertElement: 390 case Instruction::ShuffleVector: 391 case Instruction::InsertValue: 392 case Instruction::GetElementPtr: 393 exp = create_expression(I); 394 break; 395 case Instruction::ExtractValue: 396 exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); 397 break; 398 default: 399 valueNumbering[V] = nextValueNumber; 400 return nextValueNumber++; 401 } 402 403 uint32_t& e = expressionNumbering[exp]; 404 if (!e) e = nextValueNumber++; 405 valueNumbering[V] = e; 406 return e; 407 } 408 409 /// lookup - Returns the value number of the specified value. Fails if 410 /// the value has not yet been numbered. 411 uint32_t ValueTable::lookup(Value *V) const { 412 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 413 assert(VI != valueNumbering.end() && "Value not numbered?"); 414 return VI->second; 415 } 416 417 /// clear - Remove all entries from the ValueTable. 418 void ValueTable::clear() { 419 valueNumbering.clear(); 420 expressionNumbering.clear(); 421 nextValueNumber = 1; 422 } 423 424 /// erase - Remove a value from the value numbering. 425 void ValueTable::erase(Value *V) { 426 valueNumbering.erase(V); 427 } 428 429 /// verifyRemoved - Verify that the value is removed from all internal data 430 /// structures. 431 void ValueTable::verifyRemoved(const Value *V) const { 432 for (DenseMap<Value*, uint32_t>::const_iterator 433 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 434 assert(I->first != V && "Inst still occurs in value numbering map!"); 435 } 436 } 437 438 //===----------------------------------------------------------------------===// 439 // GVN Pass 440 //===----------------------------------------------------------------------===// 441 442 namespace { 443 444 class GVN : public FunctionPass { 445 bool NoLoads; 446 MemoryDependenceAnalysis *MD; 447 DominatorTree *DT; 448 const TargetData *TD; 449 450 ValueTable VN; 451 452 /// LeaderTable - A mapping from value numbers to lists of Value*'s that 453 /// have that value number. Use findLeader to query it. 454 struct LeaderTableEntry { 455 Value *Val; 456 BasicBlock *BB; 457 LeaderTableEntry *Next; 458 }; 459 DenseMap<uint32_t, LeaderTableEntry> LeaderTable; 460 BumpPtrAllocator TableAllocator; 461 462 SmallVector<Instruction*, 8> InstrsToErase; 463 public: 464 static char ID; // Pass identification, replacement for typeid 465 explicit GVN(bool noloads = false) 466 : FunctionPass(ID), NoLoads(noloads), MD(0) { 467 initializeGVNPass(*PassRegistry::getPassRegistry()); 468 } 469 470 bool runOnFunction(Function &F); 471 472 /// markInstructionForDeletion - This removes the specified instruction from 473 /// our various maps and marks it for deletion. 474 void markInstructionForDeletion(Instruction *I) { 475 VN.erase(I); 476 InstrsToErase.push_back(I); 477 } 478 479 const TargetData *getTargetData() const { return TD; } 480 DominatorTree &getDominatorTree() const { return *DT; } 481 AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } 482 MemoryDependenceAnalysis &getMemDep() const { return *MD; } 483 private: 484 /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for 485 /// its value number. 486 void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { 487 LeaderTableEntry &Curr = LeaderTable[N]; 488 if (!Curr.Val) { 489 Curr.Val = V; 490 Curr.BB = BB; 491 return; 492 } 493 494 LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); 495 Node->Val = V; 496 Node->BB = BB; 497 Node->Next = Curr.Next; 498 Curr.Next = Node; 499 } 500 501 /// removeFromLeaderTable - Scan the list of values corresponding to a given 502 /// value number, and remove the given value if encountered. 503 void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { 504 LeaderTableEntry* Prev = 0; 505 LeaderTableEntry* Curr = &LeaderTable[N]; 506 507 while (Curr->Val != V || Curr->BB != BB) { 508 Prev = Curr; 509 Curr = Curr->Next; 510 } 511 512 if (Prev) { 513 Prev->Next = Curr->Next; 514 } else { 515 if (!Curr->Next) { 516 Curr->Val = 0; 517 Curr->BB = 0; 518 } else { 519 LeaderTableEntry* Next = Curr->Next; 520 Curr->Val = Next->Val; 521 Curr->BB = Next->BB; 522 Curr->Next = Next->Next; 523 } 524 } 525 } 526 527 // List of critical edges to be split between iterations. 528 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 529 530 // This transformation requires dominator postdominator info 531 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 532 AU.addRequired<DominatorTree>(); 533 if (!NoLoads) 534 AU.addRequired<MemoryDependenceAnalysis>(); 535 AU.addRequired<AliasAnalysis>(); 536 537 AU.addPreserved<DominatorTree>(); 538 AU.addPreserved<AliasAnalysis>(); 539 } 540 541 542 // Helper fuctions 543 // FIXME: eliminate or document these better 544 bool processLoad(LoadInst *L); 545 bool processInstruction(Instruction *I); 546 bool processNonLocalLoad(LoadInst *L); 547 bool processBlock(BasicBlock *BB); 548 void dump(DenseMap<uint32_t, Value*> &d); 549 bool iterateOnFunction(Function &F); 550 bool performPRE(Function &F); 551 Value *findLeader(BasicBlock *BB, uint32_t num); 552 void cleanupGlobalSets(); 553 void verifyRemoved(const Instruction *I) const; 554 bool splitCriticalEdges(); 555 unsigned replaceAllDominatedUsesWith(Value *From, Value *To, 556 BasicBlock *Root); 557 bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); 558 }; 559 560 char GVN::ID = 0; 561 } 562 563 // createGVNPass - The public interface to this file... 564 FunctionPass *llvm::createGVNPass(bool NoLoads) { 565 return new GVN(NoLoads); 566 } 567 568 INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 569 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 570 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 571 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 572 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 573 574 void GVN::dump(DenseMap<uint32_t, Value*>& d) { 575 errs() << "{\n"; 576 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 577 E = d.end(); I != E; ++I) { 578 errs() << I->first << "\n"; 579 I->second->dump(); 580 } 581 errs() << "}\n"; 582 } 583 584 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value 585 /// we're analyzing is fully available in the specified block. As we go, keep 586 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This 587 /// map is actually a tri-state map with the following values: 588 /// 0) we know the block *is not* fully available. 589 /// 1) we know the block *is* fully available. 590 /// 2) we do not know whether the block is fully available or not, but we are 591 /// currently speculating that it will be. 592 /// 3) we are speculating for this block and have used that to speculate for 593 /// other blocks. 594 static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 595 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 596 // Optimistically assume that the block is fully available and check to see 597 // if we already know about this block in one lookup. 598 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 599 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 600 601 // If the entry already existed for this block, return the precomputed value. 602 if (!IV.second) { 603 // If this is a speculative "available" value, mark it as being used for 604 // speculation of other blocks. 605 if (IV.first->second == 2) 606 IV.first->second = 3; 607 return IV.first->second != 0; 608 } 609 610 // Otherwise, see if it is fully available in all predecessors. 611 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 612 613 // If this block has no predecessors, it isn't live-in here. 614 if (PI == PE) 615 goto SpeculationFailure; 616 617 for (; PI != PE; ++PI) 618 // If the value isn't fully available in one of our predecessors, then it 619 // isn't fully available in this block either. Undo our previous 620 // optimistic assumption and bail out. 621 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 622 goto SpeculationFailure; 623 624 return true; 625 626 // SpeculationFailure - If we get here, we found out that this is not, after 627 // all, a fully-available block. We have a problem if we speculated on this and 628 // used the speculation to mark other blocks as available. 629 SpeculationFailure: 630 char &BBVal = FullyAvailableBlocks[BB]; 631 632 // If we didn't speculate on this, just return with it set to false. 633 if (BBVal == 2) { 634 BBVal = 0; 635 return false; 636 } 637 638 // If we did speculate on this value, we could have blocks set to 1 that are 639 // incorrect. Walk the (transitive) successors of this block and mark them as 640 // 0 if set to one. 641 SmallVector<BasicBlock*, 32> BBWorklist; 642 BBWorklist.push_back(BB); 643 644 do { 645 BasicBlock *Entry = BBWorklist.pop_back_val(); 646 // Note that this sets blocks to 0 (unavailable) if they happen to not 647 // already be in FullyAvailableBlocks. This is safe. 648 char &EntryVal = FullyAvailableBlocks[Entry]; 649 if (EntryVal == 0) continue; // Already unavailable. 650 651 // Mark as unavailable. 652 EntryVal = 0; 653 654 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 655 BBWorklist.push_back(*I); 656 } while (!BBWorklist.empty()); 657 658 return false; 659 } 660 661 662 /// CanCoerceMustAliasedValueToLoad - Return true if 663 /// CoerceAvailableValueToLoadType will succeed. 664 static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 665 Type *LoadTy, 666 const TargetData &TD) { 667 // If the loaded or stored value is an first class array or struct, don't try 668 // to transform them. We need to be able to bitcast to integer. 669 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 670 StoredVal->getType()->isStructTy() || 671 StoredVal->getType()->isArrayTy()) 672 return false; 673 674 // The store has to be at least as big as the load. 675 if (TD.getTypeSizeInBits(StoredVal->getType()) < 676 TD.getTypeSizeInBits(LoadTy)) 677 return false; 678 679 return true; 680 } 681 682 683 /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 684 /// then a load from a must-aliased pointer of a different type, try to coerce 685 /// the stored value. LoadedTy is the type of the load we want to replace and 686 /// InsertPt is the place to insert new instructions. 687 /// 688 /// If we can't do it, return null. 689 static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 690 Type *LoadedTy, 691 Instruction *InsertPt, 692 const TargetData &TD) { 693 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 694 return 0; 695 696 // If this is already the right type, just return it. 697 Type *StoredValTy = StoredVal->getType(); 698 699 uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); 700 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 701 702 // If the store and reload are the same size, we can always reuse it. 703 if (StoreSize == LoadSize) { 704 // Pointer to Pointer -> use bitcast. 705 if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) 706 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 707 708 // Convert source pointers to integers, which can be bitcast. 709 if (StoredValTy->isPointerTy()) { 710 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 711 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 712 } 713 714 Type *TypeToCastTo = LoadedTy; 715 if (TypeToCastTo->isPointerTy()) 716 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 717 718 if (StoredValTy != TypeToCastTo) 719 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 720 721 // Cast to pointer if the load needs a pointer type. 722 if (LoadedTy->isPointerTy()) 723 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 724 725 return StoredVal; 726 } 727 728 // If the loaded value is smaller than the available value, then we can 729 // extract out a piece from it. If the available value is too small, then we 730 // can't do anything. 731 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 732 733 // Convert source pointers to integers, which can be manipulated. 734 if (StoredValTy->isPointerTy()) { 735 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 736 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 737 } 738 739 // Convert vectors and fp to integer, which can be manipulated. 740 if (!StoredValTy->isIntegerTy()) { 741 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 742 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 743 } 744 745 // If this is a big-endian system, we need to shift the value down to the low 746 // bits so that a truncate will work. 747 if (TD.isBigEndian()) { 748 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 749 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 750 } 751 752 // Truncate the integer to the right size now. 753 Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 754 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 755 756 if (LoadedTy == NewIntTy) 757 return StoredVal; 758 759 // If the result is a pointer, inttoptr. 760 if (LoadedTy->isPointerTy()) 761 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 762 763 // Otherwise, bitcast. 764 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 765 } 766 767 /// AnalyzeLoadFromClobberingWrite - This function is called when we have a 768 /// memdep query of a load that ends up being a clobbering memory write (store, 769 /// memset, memcpy, memmove). This means that the write *may* provide bits used 770 /// by the load but we can't be sure because the pointers don't mustalias. 771 /// 772 /// Check this case to see if there is anything more we can do before we give 773 /// up. This returns -1 if we have to give up, or a byte number in the stored 774 /// value of the piece that feeds the load. 775 static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, 776 Value *WritePtr, 777 uint64_t WriteSizeInBits, 778 const TargetData &TD) { 779 // If the loaded or stored value is an first class array or struct, don't try 780 // to transform them. We need to be able to bitcast to integer. 781 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 782 return -1; 783 784 int64_t StoreOffset = 0, LoadOffset = 0; 785 Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); 786 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); 787 if (StoreBase != LoadBase) 788 return -1; 789 790 // If the load and store are to the exact same address, they should have been 791 // a must alias. AA must have gotten confused. 792 // FIXME: Study to see if/when this happens. One case is forwarding a memset 793 // to a load from the base of the memset. 794 #if 0 795 if (LoadOffset == StoreOffset) { 796 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 797 << "Base = " << *StoreBase << "\n" 798 << "Store Ptr = " << *WritePtr << "\n" 799 << "Store Offs = " << StoreOffset << "\n" 800 << "Load Ptr = " << *LoadPtr << "\n"; 801 abort(); 802 } 803 #endif 804 805 // If the load and store don't overlap at all, the store doesn't provide 806 // anything to the load. In this case, they really don't alias at all, AA 807 // must have gotten confused. 808 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); 809 810 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 811 return -1; 812 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 813 LoadSize >>= 3; 814 815 816 bool isAAFailure = false; 817 if (StoreOffset < LoadOffset) 818 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 819 else 820 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 821 822 if (isAAFailure) { 823 #if 0 824 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 825 << "Base = " << *StoreBase << "\n" 826 << "Store Ptr = " << *WritePtr << "\n" 827 << "Store Offs = " << StoreOffset << "\n" 828 << "Load Ptr = " << *LoadPtr << "\n"; 829 abort(); 830 #endif 831 return -1; 832 } 833 834 // If the Load isn't completely contained within the stored bits, we don't 835 // have all the bits to feed it. We could do something crazy in the future 836 // (issue a smaller load then merge the bits in) but this seems unlikely to be 837 // valuable. 838 if (StoreOffset > LoadOffset || 839 StoreOffset+StoreSize < LoadOffset+LoadSize) 840 return -1; 841 842 // Okay, we can do this transformation. Return the number of bytes into the 843 // store that the load is. 844 return LoadOffset-StoreOffset; 845 } 846 847 /// AnalyzeLoadFromClobberingStore - This function is called when we have a 848 /// memdep query of a load that ends up being a clobbering store. 849 static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, 850 StoreInst *DepSI, 851 const TargetData &TD) { 852 // Cannot handle reading from store of first-class aggregate yet. 853 if (DepSI->getValueOperand()->getType()->isStructTy() || 854 DepSI->getValueOperand()->getType()->isArrayTy()) 855 return -1; 856 857 Value *StorePtr = DepSI->getPointerOperand(); 858 uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 859 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 860 StorePtr, StoreSize, TD); 861 } 862 863 /// AnalyzeLoadFromClobberingLoad - This function is called when we have a 864 /// memdep query of a load that ends up being clobbered by another load. See if 865 /// the other load can feed into the second load. 866 static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, 867 LoadInst *DepLI, const TargetData &TD){ 868 // Cannot handle reading from store of first-class aggregate yet. 869 if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) 870 return -1; 871 872 Value *DepPtr = DepLI->getPointerOperand(); 873 uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); 874 int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); 875 if (R != -1) return R; 876 877 // If we have a load/load clobber an DepLI can be widened to cover this load, 878 // then we should widen it! 879 int64_t LoadOffs = 0; 880 const Value *LoadBase = 881 GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); 882 unsigned LoadSize = TD.getTypeStoreSize(LoadTy); 883 884 unsigned Size = MemoryDependenceAnalysis:: 885 getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); 886 if (Size == 0) return -1; 887 888 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); 889 } 890 891 892 893 static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, 894 MemIntrinsic *MI, 895 const TargetData &TD) { 896 // If the mem operation is a non-constant size, we can't handle it. 897 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 898 if (SizeCst == 0) return -1; 899 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 900 901 // If this is memset, we just need to see if the offset is valid in the size 902 // of the memset.. 903 if (MI->getIntrinsicID() == Intrinsic::memset) 904 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 905 MemSizeInBits, TD); 906 907 // If we have a memcpy/memmove, the only case we can handle is if this is a 908 // copy from constant memory. In that case, we can read directly from the 909 // constant memory. 910 MemTransferInst *MTI = cast<MemTransferInst>(MI); 911 912 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 913 if (Src == 0) return -1; 914 915 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD)); 916 if (GV == 0 || !GV->isConstant()) return -1; 917 918 // See if the access is within the bounds of the transfer. 919 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 920 MI->getDest(), MemSizeInBits, TD); 921 if (Offset == -1) 922 return Offset; 923 924 // Otherwise, see if we can constant fold a load from the constant with the 925 // offset applied as appropriate. 926 Src = ConstantExpr::getBitCast(Src, 927 llvm::Type::getInt8PtrTy(Src->getContext())); 928 Constant *OffsetCst = 929 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 930 Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); 931 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 932 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 933 return Offset; 934 return -1; 935 } 936 937 938 /// GetStoreValueForLoad - This function is called when we have a 939 /// memdep query of a load that ends up being a clobbering store. This means 940 /// that the store provides bits used by the load but we the pointers don't 941 /// mustalias. Check this case to see if there is anything more we can do 942 /// before we give up. 943 static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 944 Type *LoadTy, 945 Instruction *InsertPt, const TargetData &TD){ 946 LLVMContext &Ctx = SrcVal->getType()->getContext(); 947 948 uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 949 uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; 950 951 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 952 953 // Compute which bits of the stored value are being used by the load. Convert 954 // to an integer type to start with. 955 if (SrcVal->getType()->isPointerTy()) 956 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx)); 957 if (!SrcVal->getType()->isIntegerTy()) 958 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); 959 960 // Shift the bits to the least significant depending on endianness. 961 unsigned ShiftAmt; 962 if (TD.isLittleEndian()) 963 ShiftAmt = Offset*8; 964 else 965 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 966 967 if (ShiftAmt) 968 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); 969 970 if (LoadSize != StoreSize) 971 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); 972 973 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 974 } 975 976 /// GetStoreValueForLoad - This function is called when we have a 977 /// memdep query of a load that ends up being a clobbering load. This means 978 /// that the load *may* provide bits used by the load but we can't be sure 979 /// because the pointers don't mustalias. Check this case to see if there is 980 /// anything more we can do before we give up. 981 static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, 982 Type *LoadTy, Instruction *InsertPt, 983 GVN &gvn) { 984 const TargetData &TD = *gvn.getTargetData(); 985 // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to 986 // widen SrcVal out to a larger load. 987 unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); 988 unsigned LoadSize = TD.getTypeStoreSize(LoadTy); 989 if (Offset+LoadSize > SrcValSize) { 990 assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); 991 assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); 992 // If we have a load/load clobber an DepLI can be widened to cover this 993 // load, then we should widen it to the next power of 2 size big enough! 994 unsigned NewLoadSize = Offset+LoadSize; 995 if (!isPowerOf2_32(NewLoadSize)) 996 NewLoadSize = NextPowerOf2(NewLoadSize); 997 998 Value *PtrVal = SrcVal->getPointerOperand(); 999 1000 // Insert the new load after the old load. This ensures that subsequent 1001 // memdep queries will find the new load. We can't easily remove the old 1002 // load completely because it is already in the value numbering table. 1003 IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); 1004 Type *DestPTy = 1005 IntegerType::get(LoadTy->getContext(), NewLoadSize*8); 1006 DestPTy = PointerType::get(DestPTy, 1007 cast<PointerType>(PtrVal->getType())->getAddressSpace()); 1008 Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); 1009 PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); 1010 LoadInst *NewLoad = Builder.CreateLoad(PtrVal); 1011 NewLoad->takeName(SrcVal); 1012 NewLoad->setAlignment(SrcVal->getAlignment()); 1013 1014 DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); 1015 DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); 1016 1017 // Replace uses of the original load with the wider load. On a big endian 1018 // system, we need to shift down to get the relevant bits. 1019 Value *RV = NewLoad; 1020 if (TD.isBigEndian()) 1021 RV = Builder.CreateLShr(RV, 1022 NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); 1023 RV = Builder.CreateTrunc(RV, SrcVal->getType()); 1024 SrcVal->replaceAllUsesWith(RV); 1025 1026 // We would like to use gvn.markInstructionForDeletion here, but we can't 1027 // because the load is already memoized into the leader map table that GVN 1028 // tracks. It is potentially possible to remove the load from the table, 1029 // but then there all of the operations based on it would need to be 1030 // rehashed. Just leave the dead load around. 1031 gvn.getMemDep().removeInstruction(SrcVal); 1032 SrcVal = NewLoad; 1033 } 1034 1035 return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); 1036 } 1037 1038 1039 /// GetMemInstValueForLoad - This function is called when we have a 1040 /// memdep query of a load that ends up being a clobbering mem intrinsic. 1041 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 1042 Type *LoadTy, Instruction *InsertPt, 1043 const TargetData &TD){ 1044 LLVMContext &Ctx = LoadTy->getContext(); 1045 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 1046 1047 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1048 1049 // We know that this method is only called when the mem transfer fully 1050 // provides the bits for the load. 1051 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 1052 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 1053 // independently of what the offset is. 1054 Value *Val = MSI->getValue(); 1055 if (LoadSize != 1) 1056 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 1057 1058 Value *OneElt = Val; 1059 1060 // Splat the value out to the right number of bits. 1061 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 1062 // If we can double the number of bytes set, do it. 1063 if (NumBytesSet*2 <= LoadSize) { 1064 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 1065 Val = Builder.CreateOr(Val, ShVal); 1066 NumBytesSet <<= 1; 1067 continue; 1068 } 1069 1070 // Otherwise insert one byte at a time. 1071 Value *ShVal = Builder.CreateShl(Val, 1*8); 1072 Val = Builder.CreateOr(OneElt, ShVal); 1073 ++NumBytesSet; 1074 } 1075 1076 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 1077 } 1078 1079 // Otherwise, this is a memcpy/memmove from a constant global. 1080 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 1081 Constant *Src = cast<Constant>(MTI->getSource()); 1082 1083 // Otherwise, see if we can constant fold a load from the constant with the 1084 // offset applied as appropriate. 1085 Src = ConstantExpr::getBitCast(Src, 1086 llvm::Type::getInt8PtrTy(Src->getContext())); 1087 Constant *OffsetCst = 1088 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1089 Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); 1090 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1091 return ConstantFoldLoadFromConstPtr(Src, &TD); 1092 } 1093 1094 namespace { 1095 1096 struct AvailableValueInBlock { 1097 /// BB - The basic block in question. 1098 BasicBlock *BB; 1099 enum ValType { 1100 SimpleVal, // A simple offsetted value that is accessed. 1101 LoadVal, // A value produced by a load. 1102 MemIntrin // A memory intrinsic which is loaded from. 1103 }; 1104 1105 /// V - The value that is live out of the block. 1106 PointerIntPair<Value *, 2, ValType> Val; 1107 1108 /// Offset - The byte offset in Val that is interesting for the load query. 1109 unsigned Offset; 1110 1111 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 1112 unsigned Offset = 0) { 1113 AvailableValueInBlock Res; 1114 Res.BB = BB; 1115 Res.Val.setPointer(V); 1116 Res.Val.setInt(SimpleVal); 1117 Res.Offset = Offset; 1118 return Res; 1119 } 1120 1121 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 1122 unsigned Offset = 0) { 1123 AvailableValueInBlock Res; 1124 Res.BB = BB; 1125 Res.Val.setPointer(MI); 1126 Res.Val.setInt(MemIntrin); 1127 Res.Offset = Offset; 1128 return Res; 1129 } 1130 1131 static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, 1132 unsigned Offset = 0) { 1133 AvailableValueInBlock Res; 1134 Res.BB = BB; 1135 Res.Val.setPointer(LI); 1136 Res.Val.setInt(LoadVal); 1137 Res.Offset = Offset; 1138 return Res; 1139 } 1140 1141 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 1142 bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } 1143 bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } 1144 1145 Value *getSimpleValue() const { 1146 assert(isSimpleValue() && "Wrong accessor"); 1147 return Val.getPointer(); 1148 } 1149 1150 LoadInst *getCoercedLoadValue() const { 1151 assert(isCoercedLoadValue() && "Wrong accessor"); 1152 return cast<LoadInst>(Val.getPointer()); 1153 } 1154 1155 MemIntrinsic *getMemIntrinValue() const { 1156 assert(isMemIntrinValue() && "Wrong accessor"); 1157 return cast<MemIntrinsic>(Val.getPointer()); 1158 } 1159 1160 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 1161 /// defined here to the specified type. This handles various coercion cases. 1162 Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { 1163 Value *Res; 1164 if (isSimpleValue()) { 1165 Res = getSimpleValue(); 1166 if (Res->getType() != LoadTy) { 1167 const TargetData *TD = gvn.getTargetData(); 1168 assert(TD && "Need target data to handle type mismatch case"); 1169 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 1170 *TD); 1171 1172 DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1173 << *getSimpleValue() << '\n' 1174 << *Res << '\n' << "\n\n\n"); 1175 } 1176 } else if (isCoercedLoadValue()) { 1177 LoadInst *Load = getCoercedLoadValue(); 1178 if (Load->getType() == LoadTy && Offset == 0) { 1179 Res = Load; 1180 } else { 1181 Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), 1182 gvn); 1183 1184 DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " 1185 << *getCoercedLoadValue() << '\n' 1186 << *Res << '\n' << "\n\n\n"); 1187 } 1188 } else { 1189 const TargetData *TD = gvn.getTargetData(); 1190 assert(TD && "Need target data to handle type mismatch case"); 1191 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 1192 LoadTy, BB->getTerminator(), *TD); 1193 DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1194 << " " << *getMemIntrinValue() << '\n' 1195 << *Res << '\n' << "\n\n\n"); 1196 } 1197 return Res; 1198 } 1199 }; 1200 1201 } // end anonymous namespace 1202 1203 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1204 /// construct SSA form, allowing us to eliminate LI. This returns the value 1205 /// that should be used at LI's definition site. 1206 static Value *ConstructSSAForLoadSet(LoadInst *LI, 1207 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1208 GVN &gvn) { 1209 // Check for the fully redundant, dominating load case. In this case, we can 1210 // just use the dominating value directly. 1211 if (ValuesPerBlock.size() == 1 && 1212 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, 1213 LI->getParent())) 1214 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); 1215 1216 // Otherwise, we have to construct SSA form. 1217 SmallVector<PHINode*, 8> NewPHIs; 1218 SSAUpdater SSAUpdate(&NewPHIs); 1219 SSAUpdate.Initialize(LI->getType(), LI->getName()); 1220 1221 Type *LoadTy = LI->getType(); 1222 1223 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1224 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1225 BasicBlock *BB = AV.BB; 1226 1227 if (SSAUpdate.HasValueForBlock(BB)) 1228 continue; 1229 1230 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); 1231 } 1232 1233 // Perform PHI construction. 1234 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1235 1236 // If new PHI nodes were created, notify alias analysis. 1237 if (V->getType()->isPointerTy()) { 1238 AliasAnalysis *AA = gvn.getAliasAnalysis(); 1239 1240 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1241 AA->copyValue(LI, NewPHIs[i]); 1242 1243 // Now that we've copied information to the new PHIs, scan through 1244 // them again and inform alias analysis that we've added potentially 1245 // escaping uses to any values that are operands to these PHIs. 1246 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { 1247 PHINode *P = NewPHIs[i]; 1248 for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { 1249 unsigned jj = PHINode::getOperandNumForIncomingValue(ii); 1250 AA->addEscapingUse(P->getOperandUse(jj)); 1251 } 1252 } 1253 } 1254 1255 return V; 1256 } 1257 1258 static bool isLifetimeStart(const Instruction *Inst) { 1259 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1260 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1261 return false; 1262 } 1263 1264 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1265 /// non-local by performing PHI construction. 1266 bool GVN::processNonLocalLoad(LoadInst *LI) { 1267 // Find the non-local dependencies of the load. 1268 SmallVector<NonLocalDepResult, 64> Deps; 1269 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); 1270 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); 1271 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " 1272 // << Deps.size() << *LI << '\n'); 1273 1274 // If we had to process more than one hundred blocks to find the 1275 // dependencies, this load isn't worth worrying about. Optimizing 1276 // it will be too expensive. 1277 if (Deps.size() > 100) 1278 return false; 1279 1280 // If we had a phi translation failure, we'll have a single entry which is a 1281 // clobber in the current block. Reject this early. 1282 if (Deps.size() == 1 1283 && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) 1284 { 1285 DEBUG( 1286 dbgs() << "GVN: non-local load "; 1287 WriteAsOperand(dbgs(), LI); 1288 dbgs() << " has unknown dependencies\n"; 1289 ); 1290 return false; 1291 } 1292 1293 // Filter out useless results (non-locals, etc). Keep track of the blocks 1294 // where we have a value available in repl, also keep track of whether we see 1295 // dependencies that produce an unknown value for the load (such as a call 1296 // that could potentially clobber the load). 1297 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1298 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1299 1300 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1301 BasicBlock *DepBB = Deps[i].getBB(); 1302 MemDepResult DepInfo = Deps[i].getResult(); 1303 1304 if (!DepInfo.isDef() && !DepInfo.isClobber()) { 1305 UnavailableBlocks.push_back(DepBB); 1306 continue; 1307 } 1308 1309 if (DepInfo.isClobber()) { 1310 // The address being loaded in this non-local block may not be the same as 1311 // the pointer operand of the load if PHI translation occurs. Make sure 1312 // to consider the right address. 1313 Value *Address = Deps[i].getAddress(); 1314 1315 // If the dependence is to a store that writes to a superset of the bits 1316 // read by the load, we can extract the bits we need for the load from the 1317 // stored value. 1318 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1319 if (TD && Address) { 1320 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 1321 DepSI, *TD); 1322 if (Offset != -1) { 1323 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1324 DepSI->getValueOperand(), 1325 Offset)); 1326 continue; 1327 } 1328 } 1329 } 1330 1331 // Check to see if we have something like this: 1332 // load i32* P 1333 // load i8* (P+1) 1334 // if we have this, replace the later with an extraction from the former. 1335 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { 1336 // If this is a clobber and L is the first instruction in its block, then 1337 // we have the first instruction in the entry block. 1338 if (DepLI != LI && Address && TD) { 1339 int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), 1340 LI->getPointerOperand(), 1341 DepLI, *TD); 1342 1343 if (Offset != -1) { 1344 ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, 1345 Offset)); 1346 continue; 1347 } 1348 } 1349 } 1350 1351 // If the clobbering value is a memset/memcpy/memmove, see if we can 1352 // forward a value on from it. 1353 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1354 if (TD && Address) { 1355 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1356 DepMI, *TD); 1357 if (Offset != -1) { 1358 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1359 Offset)); 1360 continue; 1361 } 1362 } 1363 } 1364 1365 UnavailableBlocks.push_back(DepBB); 1366 continue; 1367 } 1368 1369 // DepInfo.isDef() here 1370 1371 Instruction *DepInst = DepInfo.getInst(); 1372 1373 // Loading the allocation -> undef. 1374 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1375 // Loading immediately after lifetime begin -> undef. 1376 isLifetimeStart(DepInst)) { 1377 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1378 UndefValue::get(LI->getType()))); 1379 continue; 1380 } 1381 1382 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1383 // Reject loads and stores that are to the same address but are of 1384 // different types if we have to. 1385 if (S->getValueOperand()->getType() != LI->getType()) { 1386 // If the stored value is larger or equal to the loaded value, we can 1387 // reuse it. 1388 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 1389 LI->getType(), *TD)) { 1390 UnavailableBlocks.push_back(DepBB); 1391 continue; 1392 } 1393 } 1394 1395 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1396 S->getValueOperand())); 1397 continue; 1398 } 1399 1400 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1401 // If the types mismatch and we can't handle it, reject reuse of the load. 1402 if (LD->getType() != LI->getType()) { 1403 // If the stored value is larger or equal to the loaded value, we can 1404 // reuse it. 1405 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1406 UnavailableBlocks.push_back(DepBB); 1407 continue; 1408 } 1409 } 1410 ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); 1411 continue; 1412 } 1413 1414 UnavailableBlocks.push_back(DepBB); 1415 continue; 1416 } 1417 1418 // If we have no predecessors that produce a known value for this load, exit 1419 // early. 1420 if (ValuesPerBlock.empty()) return false; 1421 1422 // If all of the instructions we depend on produce a known value for this 1423 // load, then it is fully redundant and we can use PHI insertion to compute 1424 // its value. Insert PHIs and remove the fully redundant value now. 1425 if (UnavailableBlocks.empty()) { 1426 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1427 1428 // Perform PHI construction. 1429 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1430 LI->replaceAllUsesWith(V); 1431 1432 if (isa<PHINode>(V)) 1433 V->takeName(LI); 1434 if (V->getType()->isPointerTy()) 1435 MD->invalidateCachedPointerInfo(V); 1436 markInstructionForDeletion(LI); 1437 ++NumGVNLoad; 1438 return true; 1439 } 1440 1441 if (!EnablePRE || !EnableLoadPRE) 1442 return false; 1443 1444 // Okay, we have *some* definitions of the value. This means that the value 1445 // is available in some of our (transitive) predecessors. Lets think about 1446 // doing PRE of this load. This will involve inserting a new load into the 1447 // predecessor when it's not available. We could do this in general, but 1448 // prefer to not increase code size. As such, we only do this when we know 1449 // that we only have to insert *one* load (which means we're basically moving 1450 // the load, not inserting a new one). 1451 1452 SmallPtrSet<BasicBlock *, 4> Blockers; 1453 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1454 Blockers.insert(UnavailableBlocks[i]); 1455 1456 // Let's find the first basic block with more than one predecessor. Walk 1457 // backwards through predecessors if needed. 1458 BasicBlock *LoadBB = LI->getParent(); 1459 BasicBlock *TmpBB = LoadBB; 1460 1461 bool isSinglePred = false; 1462 bool allSingleSucc = true; 1463 while (TmpBB->getSinglePredecessor()) { 1464 isSinglePred = true; 1465 TmpBB = TmpBB->getSinglePredecessor(); 1466 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1467 return false; 1468 if (Blockers.count(TmpBB)) 1469 return false; 1470 1471 // If any of these blocks has more than one successor (i.e. if the edge we 1472 // just traversed was critical), then there are other paths through this 1473 // block along which the load may not be anticipated. Hoisting the load 1474 // above this block would be adding the load to execution paths along 1475 // which it was not previously executed. 1476 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1477 return false; 1478 } 1479 1480 assert(TmpBB); 1481 LoadBB = TmpBB; 1482 1483 // FIXME: It is extremely unclear what this loop is doing, other than 1484 // artificially restricting loadpre. 1485 if (isSinglePred) { 1486 bool isHot = false; 1487 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1488 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1489 if (AV.isSimpleValue()) 1490 // "Hot" Instruction is in some loop (because it dominates its dep. 1491 // instruction). 1492 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1493 if (DT->dominates(LI, I)) { 1494 isHot = true; 1495 break; 1496 } 1497 } 1498 1499 // We are interested only in "hot" instructions. We don't want to do any 1500 // mis-optimizations here. 1501 if (!isHot) 1502 return false; 1503 } 1504 1505 // Check to see how many predecessors have the loaded value fully 1506 // available. 1507 DenseMap<BasicBlock*, Value*> PredLoads; 1508 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1509 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1510 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1511 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1512 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1513 1514 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit; 1515 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1516 PI != E; ++PI) { 1517 BasicBlock *Pred = *PI; 1518 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { 1519 continue; 1520 } 1521 PredLoads[Pred] = 0; 1522 1523 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1524 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1525 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1526 << Pred->getName() << "': " << *LI << '\n'); 1527 return false; 1528 } 1529 1530 if (LoadBB->isLandingPad()) { 1531 DEBUG(dbgs() 1532 << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" 1533 << Pred->getName() << "': " << *LI << '\n'); 1534 return false; 1535 } 1536 1537 unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); 1538 NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); 1539 } 1540 } 1541 1542 if (!NeedToSplit.empty()) { 1543 toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); 1544 return false; 1545 } 1546 1547 // Decide whether PRE is profitable for this load. 1548 unsigned NumUnavailablePreds = PredLoads.size(); 1549 assert(NumUnavailablePreds != 0 && 1550 "Fully available value should be eliminated above!"); 1551 1552 // If this load is unavailable in multiple predecessors, reject it. 1553 // FIXME: If we could restructure the CFG, we could make a common pred with 1554 // all the preds that don't have an available LI and insert a new load into 1555 // that one block. 1556 if (NumUnavailablePreds != 1) 1557 return false; 1558 1559 // Check if the load can safely be moved to all the unavailable predecessors. 1560 bool CanDoPRE = true; 1561 SmallVector<Instruction*, 8> NewInsts; 1562 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1563 E = PredLoads.end(); I != E; ++I) { 1564 BasicBlock *UnavailablePred = I->first; 1565 1566 // Do PHI translation to get its value in the predecessor if necessary. The 1567 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1568 1569 // If all preds have a single successor, then we know it is safe to insert 1570 // the load on the pred (?!?), so we can insert code to materialize the 1571 // pointer if it is not available. 1572 PHITransAddr Address(LI->getPointerOperand(), TD); 1573 Value *LoadPtr = 0; 1574 if (allSingleSucc) { 1575 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1576 *DT, NewInsts); 1577 } else { 1578 Address.PHITranslateValue(LoadBB, UnavailablePred, DT); 1579 LoadPtr = Address.getAddr(); 1580 } 1581 1582 // If we couldn't find or insert a computation of this phi translated value, 1583 // we fail PRE. 1584 if (LoadPtr == 0) { 1585 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1586 << *LI->getPointerOperand() << "\n"); 1587 CanDoPRE = false; 1588 break; 1589 } 1590 1591 // Make sure it is valid to move this load here. We have to watch out for: 1592 // @1 = getelementptr (i8* p, ... 1593 // test p and branch if == 0 1594 // load @1 1595 // It is valid to have the getelementptr before the test, even if p can 1596 // be 0, as getelementptr only does address arithmetic. 1597 // If we are not pushing the value through any multiple-successor blocks 1598 // we do not have this case. Otherwise, check that the load is safe to 1599 // put anywhere; this can be improved, but should be conservatively safe. 1600 if (!allSingleSucc && 1601 // FIXME: REEVALUTE THIS. 1602 !isSafeToLoadUnconditionally(LoadPtr, 1603 UnavailablePred->getTerminator(), 1604 LI->getAlignment(), TD)) { 1605 CanDoPRE = false; 1606 break; 1607 } 1608 1609 I->second = LoadPtr; 1610 } 1611 1612 if (!CanDoPRE) { 1613 while (!NewInsts.empty()) { 1614 Instruction *I = NewInsts.pop_back_val(); 1615 if (MD) MD->removeInstruction(I); 1616 I->eraseFromParent(); 1617 } 1618 return false; 1619 } 1620 1621 // Okay, we can eliminate this load by inserting a reload in the predecessor 1622 // and using PHI construction to get the value in the other predecessors, do 1623 // it. 1624 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1625 DEBUG(if (!NewInsts.empty()) 1626 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1627 << *NewInsts.back() << '\n'); 1628 1629 // Assign value numbers to the new instructions. 1630 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1631 // FIXME: We really _ought_ to insert these value numbers into their 1632 // parent's availability map. However, in doing so, we risk getting into 1633 // ordering issues. If a block hasn't been processed yet, we would be 1634 // marking a value as AVAIL-IN, which isn't what we intend. 1635 VN.lookup_or_add(NewInsts[i]); 1636 } 1637 1638 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1639 E = PredLoads.end(); I != E; ++I) { 1640 BasicBlock *UnavailablePred = I->first; 1641 Value *LoadPtr = I->second; 1642 1643 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1644 LI->getAlignment(), 1645 UnavailablePred->getTerminator()); 1646 1647 // Transfer the old load's TBAA tag to the new load. 1648 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) 1649 NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1650 1651 // Transfer DebugLoc. 1652 NewLoad->setDebugLoc(LI->getDebugLoc()); 1653 1654 // Add the newly created load. 1655 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1656 NewLoad)); 1657 MD->invalidateCachedPointerInfo(LoadPtr); 1658 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1659 } 1660 1661 // Perform PHI construction. 1662 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1663 LI->replaceAllUsesWith(V); 1664 if (isa<PHINode>(V)) 1665 V->takeName(LI); 1666 if (V->getType()->isPointerTy()) 1667 MD->invalidateCachedPointerInfo(V); 1668 markInstructionForDeletion(LI); 1669 ++NumPRELoad; 1670 return true; 1671 } 1672 1673 /// processLoad - Attempt to eliminate a load, first by eliminating it 1674 /// locally, and then attempting non-local elimination if that fails. 1675 bool GVN::processLoad(LoadInst *L) { 1676 if (!MD) 1677 return false; 1678 1679 if (!L->isSimple()) 1680 return false; 1681 1682 if (L->use_empty()) { 1683 markInstructionForDeletion(L); 1684 return true; 1685 } 1686 1687 // ... to a pointer that has been loaded from before... 1688 MemDepResult Dep = MD->getDependency(L); 1689 1690 // If we have a clobber and target data is around, see if this is a clobber 1691 // that we can fix up through code synthesis. 1692 if (Dep.isClobber() && TD) { 1693 // Check to see if we have something like this: 1694 // store i32 123, i32* %P 1695 // %A = bitcast i32* %P to i8* 1696 // %B = gep i8* %A, i32 1 1697 // %C = load i8* %B 1698 // 1699 // We could do that by recognizing if the clobber instructions are obviously 1700 // a common base + constant offset, and if the previous store (or memset) 1701 // completely covers this load. This sort of thing can happen in bitfield 1702 // access code. 1703 Value *AvailVal = 0; 1704 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { 1705 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 1706 L->getPointerOperand(), 1707 DepSI, *TD); 1708 if (Offset != -1) 1709 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 1710 L->getType(), L, *TD); 1711 } 1712 1713 // Check to see if we have something like this: 1714 // load i32* P 1715 // load i8* (P+1) 1716 // if we have this, replace the later with an extraction from the former. 1717 if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { 1718 // If this is a clobber and L is the first instruction in its block, then 1719 // we have the first instruction in the entry block. 1720 if (DepLI == L) 1721 return false; 1722 1723 int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), 1724 L->getPointerOperand(), 1725 DepLI, *TD); 1726 if (Offset != -1) 1727 AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); 1728 } 1729 1730 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1731 // a value on from it. 1732 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1733 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 1734 L->getPointerOperand(), 1735 DepMI, *TD); 1736 if (Offset != -1) 1737 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); 1738 } 1739 1740 if (AvailVal) { 1741 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1742 << *AvailVal << '\n' << *L << "\n\n\n"); 1743 1744 // Replace the load! 1745 L->replaceAllUsesWith(AvailVal); 1746 if (AvailVal->getType()->isPointerTy()) 1747 MD->invalidateCachedPointerInfo(AvailVal); 1748 markInstructionForDeletion(L); 1749 ++NumGVNLoad; 1750 return true; 1751 } 1752 } 1753 1754 // If the value isn't available, don't do anything! 1755 if (Dep.isClobber()) { 1756 DEBUG( 1757 // fast print dep, using operator<< on instruction is too slow. 1758 dbgs() << "GVN: load "; 1759 WriteAsOperand(dbgs(), L); 1760 Instruction *I = Dep.getInst(); 1761 dbgs() << " is clobbered by " << *I << '\n'; 1762 ); 1763 return false; 1764 } 1765 1766 // If it is defined in another block, try harder. 1767 if (Dep.isNonLocal()) 1768 return processNonLocalLoad(L); 1769 1770 if (!Dep.isDef()) { 1771 DEBUG( 1772 // fast print dep, using operator<< on instruction is too slow. 1773 dbgs() << "GVN: load "; 1774 WriteAsOperand(dbgs(), L); 1775 dbgs() << " has unknown dependence\n"; 1776 ); 1777 return false; 1778 } 1779 1780 Instruction *DepInst = Dep.getInst(); 1781 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1782 Value *StoredVal = DepSI->getValueOperand(); 1783 1784 // The store and load are to a must-aliased pointer, but they may not 1785 // actually have the same type. See if we know how to reuse the stored 1786 // value (depending on its type). 1787 if (StoredVal->getType() != L->getType()) { 1788 if (TD) { 1789 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1790 L, *TD); 1791 if (StoredVal == 0) 1792 return false; 1793 1794 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1795 << '\n' << *L << "\n\n\n"); 1796 } 1797 else 1798 return false; 1799 } 1800 1801 // Remove it! 1802 L->replaceAllUsesWith(StoredVal); 1803 if (StoredVal->getType()->isPointerTy()) 1804 MD->invalidateCachedPointerInfo(StoredVal); 1805 markInstructionForDeletion(L); 1806 ++NumGVNLoad; 1807 return true; 1808 } 1809 1810 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1811 Value *AvailableVal = DepLI; 1812 1813 // The loads are of a must-aliased pointer, but they may not actually have 1814 // the same type. See if we know how to reuse the previously loaded value 1815 // (depending on its type). 1816 if (DepLI->getType() != L->getType()) { 1817 if (TD) { 1818 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), 1819 L, *TD); 1820 if (AvailableVal == 0) 1821 return false; 1822 1823 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1824 << "\n" << *L << "\n\n\n"); 1825 } 1826 else 1827 return false; 1828 } 1829 1830 // Remove it! 1831 L->replaceAllUsesWith(AvailableVal); 1832 if (DepLI->getType()->isPointerTy()) 1833 MD->invalidateCachedPointerInfo(DepLI); 1834 markInstructionForDeletion(L); 1835 ++NumGVNLoad; 1836 return true; 1837 } 1838 1839 // If this load really doesn't depend on anything, then we must be loading an 1840 // undef value. This can happen when loading for a fresh allocation with no 1841 // intervening stores, for example. 1842 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1843 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1844 markInstructionForDeletion(L); 1845 ++NumGVNLoad; 1846 return true; 1847 } 1848 1849 // If this load occurs either right after a lifetime begin, 1850 // then the loaded value is undefined. 1851 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { 1852 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1853 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1854 markInstructionForDeletion(L); 1855 ++NumGVNLoad; 1856 return true; 1857 } 1858 } 1859 1860 return false; 1861 } 1862 1863 // findLeader - In order to find a leader for a given value number at a 1864 // specific basic block, we first obtain the list of all Values for that number, 1865 // and then scan the list to find one whose block dominates the block in 1866 // question. This is fast because dominator tree queries consist of only 1867 // a few comparisons of DFS numbers. 1868 Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { 1869 LeaderTableEntry Vals = LeaderTable[num]; 1870 if (!Vals.Val) return 0; 1871 1872 Value *Val = 0; 1873 if (DT->dominates(Vals.BB, BB)) { 1874 Val = Vals.Val; 1875 if (isa<Constant>(Val)) return Val; 1876 } 1877 1878 LeaderTableEntry* Next = Vals.Next; 1879 while (Next) { 1880 if (DT->dominates(Next->BB, BB)) { 1881 if (isa<Constant>(Next->Val)) return Next->Val; 1882 if (!Val) Val = Next->Val; 1883 } 1884 1885 Next = Next->Next; 1886 } 1887 1888 return Val; 1889 } 1890 1891 /// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the 1892 /// use is dominated by the given basic block. Returns the number of uses that 1893 /// were replaced. 1894 unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, 1895 BasicBlock *Root) { 1896 unsigned Count = 0; 1897 for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); 1898 UI != UE; ) { 1899 Instruction *User = cast<Instruction>(*UI); 1900 unsigned OpNum = UI.getOperandNo(); 1901 ++UI; 1902 1903 if (DT->dominates(Root, User->getParent())) { 1904 User->setOperand(OpNum, To); 1905 ++Count; 1906 } 1907 } 1908 return Count; 1909 } 1910 1911 /// propagateEquality - The given values are known to be equal in every block 1912 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with 1913 /// 'RHS' everywhere in the scope. Returns whether a change was made. 1914 bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { 1915 if (LHS == RHS) return false; 1916 assert(LHS->getType() == RHS->getType() && "Equal but types differ!"); 1917 1918 // Don't try to propagate equalities between constants. 1919 if (isa<Constant>(LHS) && isa<Constant>(RHS)) 1920 return false; 1921 1922 // Make sure that any constants are on the right-hand side. In general the 1923 // best results are obtained by placing the longest lived value on the RHS. 1924 if (isa<Constant>(LHS)) 1925 std::swap(LHS, RHS); 1926 1927 // If neither term is constant then bail out. This is not for correctness, 1928 // it's just that the non-constant case is much less useful: it occurs just 1929 // as often as the constant case but handling it hardly ever results in an 1930 // improvement. 1931 if (!isa<Constant>(RHS)) 1932 return false; 1933 1934 // If value numbering later deduces that an instruction in the scope is equal 1935 // to 'LHS' then ensure it will be turned into 'RHS'. 1936 addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root); 1937 1938 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As 1939 // LHS always has at least one use that is not dominated by Root, this will 1940 // never do anything if LHS has only one use. 1941 bool Changed = false; 1942 if (!LHS->hasOneUse()) { 1943 unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); 1944 Changed |= NumReplacements > 0; 1945 NumGVNEqProp += NumReplacements; 1946 } 1947 1948 // Now try to deduce additional equalities from this one. For example, if the 1949 // known equality was "(A != B)" == "false" then it follows that A and B are 1950 // equal in the scope. Only boolean equalities with an explicit true or false 1951 // RHS are currently supported. 1952 if (!RHS->getType()->isIntegerTy(1)) 1953 // Not a boolean equality - bail out. 1954 return Changed; 1955 ConstantInt *CI = dyn_cast<ConstantInt>(RHS); 1956 if (!CI) 1957 // RHS neither 'true' nor 'false' - bail out. 1958 return Changed; 1959 // Whether RHS equals 'true'. Otherwise it equals 'false'. 1960 bool isKnownTrue = CI->isAllOnesValue(); 1961 bool isKnownFalse = !isKnownTrue; 1962 1963 // If "A && B" is known true then both A and B are known true. If "A || B" 1964 // is known false then both A and B are known false. 1965 Value *A, *B; 1966 if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || 1967 (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { 1968 Changed |= propagateEquality(A, RHS, Root); 1969 Changed |= propagateEquality(B, RHS, Root); 1970 return Changed; 1971 } 1972 1973 // If we are propagating an equality like "(A == B)" == "true" then also 1974 // propagate the equality A == B. 1975 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { 1976 // Only equality comparisons are supported. 1977 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || 1978 (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) { 1979 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); 1980 Changed |= propagateEquality(Op0, Op1, Root); 1981 } 1982 return Changed; 1983 } 1984 1985 return Changed; 1986 } 1987 1988 /// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return 1989 /// true if every path from the entry block to 'Dst' passes via this edge. In 1990 /// particular 'Dst' must not be reachable via another edge from 'Src'. 1991 static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst, 1992 DominatorTree *DT) { 1993 // First off, there must not be more than one edge from Src to Dst, there 1994 // should be exactly one. So keep track of the number of times Src occurs 1995 // as a predecessor of Dst and fail if it's more than once. Secondly, any 1996 // other predecessors of Dst should be dominated by Dst (see logic below). 1997 bool SawEdgeFromSrc = false; 1998 for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) { 1999 BasicBlock *Pred = *PI; 2000 if (Pred == Src) { 2001 // An edge from Src to Dst. 2002 if (SawEdgeFromSrc) 2003 // There are multiple edges from Src to Dst - fail. 2004 return false; 2005 SawEdgeFromSrc = true; 2006 continue; 2007 } 2008 // If the predecessor is not dominated by Dst, then it must be possible to 2009 // reach it either without passing through Src (and thus not via the edge) 2010 // or by passing through Src but taking a different edge out of Src. Either 2011 // way it is possible to reach Dst without passing via the edge, so fail. 2012 if (!DT->dominates(Dst, *PI)) 2013 return false; 2014 } 2015 assert(SawEdgeFromSrc && "No edge between these basic blocks!"); 2016 2017 // Every path from the entry block to Dst must at some point pass to Dst from 2018 // a predecessor that is not dominated by Dst. This predecessor can only be 2019 // Src, since all others are dominated by Dst. As there is only one edge from 2020 // Src to Dst, the path passes by this edge. 2021 return true; 2022 } 2023 2024 /// processInstruction - When calculating availability, handle an instruction 2025 /// by inserting it into the appropriate sets 2026 bool GVN::processInstruction(Instruction *I) { 2027 // Ignore dbg info intrinsics. 2028 if (isa<DbgInfoIntrinsic>(I)) 2029 return false; 2030 2031 // If the instruction can be easily simplified then do so now in preference 2032 // to value numbering it. Value numbering often exposes redundancies, for 2033 // example if it determines that %y is equal to %x then the instruction 2034 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 2035 if (Value *V = SimplifyInstruction(I, TD, DT)) { 2036 I->replaceAllUsesWith(V); 2037 if (MD && V->getType()->isPointerTy()) 2038 MD->invalidateCachedPointerInfo(V); 2039 markInstructionForDeletion(I); 2040 ++NumGVNSimpl; 2041 return true; 2042 } 2043 2044 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 2045 if (processLoad(LI)) 2046 return true; 2047 2048 unsigned Num = VN.lookup_or_add(LI); 2049 addToLeaderTable(Num, LI, LI->getParent()); 2050 return false; 2051 } 2052 2053 // For conditional branches, we can perform simple conditional propagation on 2054 // the condition value itself. 2055 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 2056 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 2057 return false; 2058 2059 Value *BranchCond = BI->getCondition(); 2060 2061 BasicBlock *TrueSucc = BI->getSuccessor(0); 2062 BasicBlock *FalseSucc = BI->getSuccessor(1); 2063 BasicBlock *Parent = BI->getParent(); 2064 bool Changed = false; 2065 2066 if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT)) 2067 Changed |= propagateEquality(BranchCond, 2068 ConstantInt::getTrue(TrueSucc->getContext()), 2069 TrueSucc); 2070 2071 if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT)) 2072 Changed |= propagateEquality(BranchCond, 2073 ConstantInt::getFalse(FalseSucc->getContext()), 2074 FalseSucc); 2075 2076 return Changed; 2077 } 2078 2079 // For switches, propagate the case values into the case destinations. 2080 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { 2081 Value *SwitchCond = SI->getCondition(); 2082 BasicBlock *Parent = SI->getParent(); 2083 bool Changed = false; 2084 for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { 2085 BasicBlock *Dst = SI->getSuccessor(i); 2086 if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) 2087 Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst); 2088 } 2089 return Changed; 2090 } 2091 2092 // Instructions with void type don't return a value, so there's 2093 // no point in trying to find redudancies in them. 2094 if (I->getType()->isVoidTy()) return false; 2095 2096 uint32_t NextNum = VN.getNextUnusedValueNumber(); 2097 unsigned Num = VN.lookup_or_add(I); 2098 2099 // Allocations are always uniquely numbered, so we can save time and memory 2100 // by fast failing them. 2101 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 2102 addToLeaderTable(Num, I, I->getParent()); 2103 return false; 2104 } 2105 2106 // If the number we were assigned was a brand new VN, then we don't 2107 // need to do a lookup to see if the number already exists 2108 // somewhere in the domtree: it can't! 2109 if (Num == NextNum) { 2110 addToLeaderTable(Num, I, I->getParent()); 2111 return false; 2112 } 2113 2114 // Perform fast-path value-number based elimination of values inherited from 2115 // dominators. 2116 Value *repl = findLeader(I->getParent(), Num); 2117 if (repl == 0) { 2118 // Failure, just remember this instance for future use. 2119 addToLeaderTable(Num, I, I->getParent()); 2120 return false; 2121 } 2122 2123 // Remove it! 2124 I->replaceAllUsesWith(repl); 2125 if (MD && repl->getType()->isPointerTy()) 2126 MD->invalidateCachedPointerInfo(repl); 2127 markInstructionForDeletion(I); 2128 return true; 2129 } 2130 2131 /// runOnFunction - This is the main transformation entry point for a function. 2132 bool GVN::runOnFunction(Function& F) { 2133 if (!NoLoads) 2134 MD = &getAnalysis<MemoryDependenceAnalysis>(); 2135 DT = &getAnalysis<DominatorTree>(); 2136 TD = getAnalysisIfAvailable<TargetData>(); 2137 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 2138 VN.setMemDep(MD); 2139 VN.setDomTree(DT); 2140 2141 bool Changed = false; 2142 bool ShouldContinue = true; 2143 2144 // Merge unconditional branches, allowing PRE to catch more 2145 // optimization opportunities. 2146 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 2147 BasicBlock *BB = FI++; 2148 2149 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 2150 if (removedBlock) ++NumGVNBlocks; 2151 2152 Changed |= removedBlock; 2153 } 2154 2155 unsigned Iteration = 0; 2156 while (ShouldContinue) { 2157 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 2158 ShouldContinue = iterateOnFunction(F); 2159 if (splitCriticalEdges()) 2160 ShouldContinue = true; 2161 Changed |= ShouldContinue; 2162 ++Iteration; 2163 } 2164 2165 if (EnablePRE) { 2166 bool PREChanged = true; 2167 while (PREChanged) { 2168 PREChanged = performPRE(F); 2169 Changed |= PREChanged; 2170 } 2171 } 2172 // FIXME: Should perform GVN again after PRE does something. PRE can move 2173 // computations into blocks where they become fully redundant. Note that 2174 // we can't do this until PRE's critical edge splitting updates memdep. 2175 // Actually, when this happens, we should just fully integrate PRE into GVN. 2176 2177 cleanupGlobalSets(); 2178 2179 return Changed; 2180 } 2181 2182 2183 bool GVN::processBlock(BasicBlock *BB) { 2184 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function 2185 // (and incrementing BI before processing an instruction). 2186 assert(InstrsToErase.empty() && 2187 "We expect InstrsToErase to be empty across iterations"); 2188 bool ChangedFunction = false; 2189 2190 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 2191 BI != BE;) { 2192 ChangedFunction |= processInstruction(BI); 2193 if (InstrsToErase.empty()) { 2194 ++BI; 2195 continue; 2196 } 2197 2198 // If we need some instructions deleted, do it now. 2199 NumGVNInstr += InstrsToErase.size(); 2200 2201 // Avoid iterator invalidation. 2202 bool AtStart = BI == BB->begin(); 2203 if (!AtStart) 2204 --BI; 2205 2206 for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(), 2207 E = InstrsToErase.end(); I != E; ++I) { 2208 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 2209 if (MD) MD->removeInstruction(*I); 2210 (*I)->eraseFromParent(); 2211 DEBUG(verifyRemoved(*I)); 2212 } 2213 InstrsToErase.clear(); 2214 2215 if (AtStart) 2216 BI = BB->begin(); 2217 else 2218 ++BI; 2219 } 2220 2221 return ChangedFunction; 2222 } 2223 2224 /// performPRE - Perform a purely local form of PRE that looks for diamond 2225 /// control flow patterns and attempts to perform simple PRE at the join point. 2226 bool GVN::performPRE(Function &F) { 2227 bool Changed = false; 2228 DenseMap<BasicBlock*, Value*> predMap; 2229 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 2230 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 2231 BasicBlock *CurrentBlock = *DI; 2232 2233 // Nothing to PRE in the entry block. 2234 if (CurrentBlock == &F.getEntryBlock()) continue; 2235 2236 // Don't perform PRE on a landing pad. 2237 if (CurrentBlock->isLandingPad()) continue; 2238 2239 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2240 BE = CurrentBlock->end(); BI != BE; ) { 2241 Instruction *CurInst = BI++; 2242 2243 if (isa<AllocaInst>(CurInst) || 2244 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 2245 CurInst->getType()->isVoidTy() || 2246 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 2247 isa<DbgInfoIntrinsic>(CurInst)) 2248 continue; 2249 2250 // We don't currently value number ANY inline asm calls. 2251 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 2252 if (CallI->isInlineAsm()) 2253 continue; 2254 2255 uint32_t ValNo = VN.lookup(CurInst); 2256 2257 // Look for the predecessors for PRE opportunities. We're 2258 // only trying to solve the basic diamond case, where 2259 // a value is computed in the successor and one predecessor, 2260 // but not the other. We also explicitly disallow cases 2261 // where the successor is its own predecessor, because they're 2262 // more complicated to get right. 2263 unsigned NumWith = 0; 2264 unsigned NumWithout = 0; 2265 BasicBlock *PREPred = 0; 2266 predMap.clear(); 2267 2268 for (pred_iterator PI = pred_begin(CurrentBlock), 2269 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 2270 BasicBlock *P = *PI; 2271 // We're not interested in PRE where the block is its 2272 // own predecessor, or in blocks with predecessors 2273 // that are not reachable. 2274 if (P == CurrentBlock) { 2275 NumWithout = 2; 2276 break; 2277 } else if (!DT->dominates(&F.getEntryBlock(), P)) { 2278 NumWithout = 2; 2279 break; 2280 } 2281 2282 Value* predV = findLeader(P, ValNo); 2283 if (predV == 0) { 2284 PREPred = P; 2285 ++NumWithout; 2286 } else if (predV == CurInst) { 2287 NumWithout = 2; 2288 } else { 2289 predMap[P] = predV; 2290 ++NumWith; 2291 } 2292 } 2293 2294 // Don't do PRE when it might increase code size, i.e. when 2295 // we would need to insert instructions in more than one pred. 2296 if (NumWithout != 1 || NumWith == 0) 2297 continue; 2298 2299 // Don't do PRE across indirect branch. 2300 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2301 continue; 2302 2303 // We can't do PRE safely on a critical edge, so instead we schedule 2304 // the edge to be split and perform the PRE the next time we iterate 2305 // on the function. 2306 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 2307 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2308 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2309 continue; 2310 } 2311 2312 // Instantiate the expression in the predecessor that lacked it. 2313 // Because we are going top-down through the block, all value numbers 2314 // will be available in the predecessor by the time we need them. Any 2315 // that weren't originally present will have been instantiated earlier 2316 // in this loop. 2317 Instruction *PREInstr = CurInst->clone(); 2318 bool success = true; 2319 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 2320 Value *Op = PREInstr->getOperand(i); 2321 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 2322 continue; 2323 2324 if (Value *V = findLeader(PREPred, VN.lookup(Op))) { 2325 PREInstr->setOperand(i, V); 2326 } else { 2327 success = false; 2328 break; 2329 } 2330 } 2331 2332 // Fail out if we encounter an operand that is not available in 2333 // the PRE predecessor. This is typically because of loads which 2334 // are not value numbered precisely. 2335 if (!success) { 2336 delete PREInstr; 2337 DEBUG(verifyRemoved(PREInstr)); 2338 continue; 2339 } 2340 2341 PREInstr->insertBefore(PREPred->getTerminator()); 2342 PREInstr->setName(CurInst->getName() + ".pre"); 2343 PREInstr->setDebugLoc(CurInst->getDebugLoc()); 2344 predMap[PREPred] = PREInstr; 2345 VN.add(PREInstr, ValNo); 2346 ++NumGVNPRE; 2347 2348 // Update the availability map to include the new instruction. 2349 addToLeaderTable(ValNo, PREInstr, PREPred); 2350 2351 // Create a PHI to make the value available in this block. 2352 pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); 2353 PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), 2354 CurInst->getName() + ".pre-phi", 2355 CurrentBlock->begin()); 2356 for (pred_iterator PI = PB; PI != PE; ++PI) { 2357 BasicBlock *P = *PI; 2358 Phi->addIncoming(predMap[P], P); 2359 } 2360 2361 VN.add(Phi, ValNo); 2362 addToLeaderTable(ValNo, Phi, CurrentBlock); 2363 Phi->setDebugLoc(CurInst->getDebugLoc()); 2364 CurInst->replaceAllUsesWith(Phi); 2365 if (Phi->getType()->isPointerTy()) { 2366 // Because we have added a PHI-use of the pointer value, it has now 2367 // "escaped" from alias analysis' perspective. We need to inform 2368 // AA of this. 2369 for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; 2370 ++ii) { 2371 unsigned jj = PHINode::getOperandNumForIncomingValue(ii); 2372 VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); 2373 } 2374 2375 if (MD) 2376 MD->invalidateCachedPointerInfo(Phi); 2377 } 2378 VN.erase(CurInst); 2379 removeFromLeaderTable(ValNo, CurInst, CurrentBlock); 2380 2381 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 2382 if (MD) MD->removeInstruction(CurInst); 2383 CurInst->eraseFromParent(); 2384 DEBUG(verifyRemoved(CurInst)); 2385 Changed = true; 2386 } 2387 } 2388 2389 if (splitCriticalEdges()) 2390 Changed = true; 2391 2392 return Changed; 2393 } 2394 2395 /// splitCriticalEdges - Split critical edges found during the previous 2396 /// iteration that may enable further optimization. 2397 bool GVN::splitCriticalEdges() { 2398 if (toSplit.empty()) 2399 return false; 2400 do { 2401 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 2402 SplitCriticalEdge(Edge.first, Edge.second, this); 2403 } while (!toSplit.empty()); 2404 if (MD) MD->invalidateCachedPredecessors(); 2405 return true; 2406 } 2407 2408 /// iterateOnFunction - Executes one iteration of GVN 2409 bool GVN::iterateOnFunction(Function &F) { 2410 cleanupGlobalSets(); 2411 2412 // Top-down walk of the dominator tree 2413 bool Changed = false; 2414 #if 0 2415 // Needed for value numbering with phi construction to work. 2416 ReversePostOrderTraversal<Function*> RPOT(&F); 2417 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2418 RE = RPOT.end(); RI != RE; ++RI) 2419 Changed |= processBlock(*RI); 2420 #else 2421 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2422 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2423 Changed |= processBlock(DI->getBlock()); 2424 #endif 2425 2426 return Changed; 2427 } 2428 2429 void GVN::cleanupGlobalSets() { 2430 VN.clear(); 2431 LeaderTable.clear(); 2432 TableAllocator.Reset(); 2433 } 2434 2435 /// verifyRemoved - Verify that the specified instruction does not occur in our 2436 /// internal data structures. 2437 void GVN::verifyRemoved(const Instruction *Inst) const { 2438 VN.verifyRemoved(Inst); 2439 2440 // Walk through the value number scope to make sure the instruction isn't 2441 // ferreted away in it. 2442 for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator 2443 I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { 2444 const LeaderTableEntry *Node = &I->second; 2445 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2446 2447 while (Node->Next) { 2448 Node = Node->Next; 2449 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2450 } 2451 } 2452 } 2453