1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements an analysis that determines, for a given memory 11 // operation, what preceding memory operations it depends on. It builds on 12 // alias analysis information, and tries to provide a lazy, caching interface to 13 // a common kind of alias information query. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #define DEBUG_TYPE "memdep" 18 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/Instructions.h" 21 #include "llvm/IntrinsicInst.h" 22 #include "llvm/Function.h" 23 #include "llvm/LLVMContext.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/Analysis/CaptureTracking.h" 26 #include "llvm/Analysis/Dominators.h" 27 #include "llvm/Analysis/InstructionSimplify.h" 28 #include "llvm/Analysis/MemoryBuiltins.h" 29 #include "llvm/Analysis/PHITransAddr.h" 30 #include "llvm/Analysis/ValueTracking.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/Support/PredIteratorCache.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Target/TargetData.h" 36 using namespace llvm; 37 38 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); 39 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); 40 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); 41 42 STATISTIC(NumCacheNonLocalPtr, 43 "Number of fully cached non-local ptr responses"); 44 STATISTIC(NumCacheDirtyNonLocalPtr, 45 "Number of cached, but dirty, non-local ptr responses"); 46 STATISTIC(NumUncacheNonLocalPtr, 47 "Number of uncached non-local ptr responses"); 48 STATISTIC(NumCacheCompleteNonLocalPtr, 49 "Number of block queries that were completely cached"); 50 51 // Limit for the number of instructions to scan in a block. 52 // FIXME: Figure out what a sane value is for this. 53 // (500 is relatively insane.) 54 static const int BlockScanLimit = 500; 55 56 char MemoryDependenceAnalysis::ID = 0; 57 58 // Register this pass... 59 INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", 60 "Memory Dependence Analysis", false, true) 61 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 62 INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", 63 "Memory Dependence Analysis", false, true) 64 65 MemoryDependenceAnalysis::MemoryDependenceAnalysis() 66 : FunctionPass(ID), PredCache(0) { 67 initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry()); 68 } 69 MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { 70 } 71 72 /// Clean up memory in between runs 73 void MemoryDependenceAnalysis::releaseMemory() { 74 LocalDeps.clear(); 75 NonLocalDeps.clear(); 76 NonLocalPointerDeps.clear(); 77 ReverseLocalDeps.clear(); 78 ReverseNonLocalDeps.clear(); 79 ReverseNonLocalPtrDeps.clear(); 80 PredCache->clear(); 81 } 82 83 84 85 /// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. 86 /// 87 void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 88 AU.setPreservesAll(); 89 AU.addRequiredTransitive<AliasAnalysis>(); 90 } 91 92 bool MemoryDependenceAnalysis::runOnFunction(Function &) { 93 AA = &getAnalysis<AliasAnalysis>(); 94 TD = getAnalysisIfAvailable<TargetData>(); 95 DT = getAnalysisIfAvailable<DominatorTree>(); 96 if (PredCache == 0) 97 PredCache.reset(new PredIteratorCache()); 98 return false; 99 } 100 101 /// RemoveFromReverseMap - This is a helper function that removes Val from 102 /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. 103 template <typename KeyTy> 104 static void RemoveFromReverseMap(DenseMap<Instruction*, 105 SmallPtrSet<KeyTy, 4> > &ReverseMap, 106 Instruction *Inst, KeyTy Val) { 107 typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator 108 InstIt = ReverseMap.find(Inst); 109 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); 110 bool Found = InstIt->second.erase(Val); 111 assert(Found && "Invalid reverse map!"); (void)Found; 112 if (InstIt->second.empty()) 113 ReverseMap.erase(InstIt); 114 } 115 116 /// GetLocation - If the given instruction references a specific memory 117 /// location, fill in Loc with the details, otherwise set Loc.Ptr to null. 118 /// Return a ModRefInfo value describing the general behavior of the 119 /// instruction. 120 static 121 AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, 122 AliasAnalysis::Location &Loc, 123 AliasAnalysis *AA) { 124 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 125 if (LI->isUnordered()) { 126 Loc = AA->getLocation(LI); 127 return AliasAnalysis::Ref; 128 } else if (LI->getOrdering() == Monotonic) { 129 Loc = AA->getLocation(LI); 130 return AliasAnalysis::ModRef; 131 } 132 Loc = AliasAnalysis::Location(); 133 return AliasAnalysis::ModRef; 134 } 135 136 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 137 if (SI->isUnordered()) { 138 Loc = AA->getLocation(SI); 139 return AliasAnalysis::Mod; 140 } else if (SI->getOrdering() == Monotonic) { 141 Loc = AA->getLocation(SI); 142 return AliasAnalysis::ModRef; 143 } 144 Loc = AliasAnalysis::Location(); 145 return AliasAnalysis::ModRef; 146 } 147 148 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { 149 Loc = AA->getLocation(V); 150 return AliasAnalysis::ModRef; 151 } 152 153 if (const CallInst *CI = isFreeCall(Inst)) { 154 // calls to free() deallocate the entire structure 155 Loc = AliasAnalysis::Location(CI->getArgOperand(0)); 156 return AliasAnalysis::Mod; 157 } 158 159 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) 160 switch (II->getIntrinsicID()) { 161 case Intrinsic::lifetime_start: 162 case Intrinsic::lifetime_end: 163 case Intrinsic::invariant_start: 164 Loc = AliasAnalysis::Location(II->getArgOperand(1), 165 cast<ConstantInt>(II->getArgOperand(0)) 166 ->getZExtValue(), 167 II->getMetadata(LLVMContext::MD_tbaa)); 168 // These intrinsics don't really modify the memory, but returning Mod 169 // will allow them to be handled conservatively. 170 return AliasAnalysis::Mod; 171 case Intrinsic::invariant_end: 172 Loc = AliasAnalysis::Location(II->getArgOperand(2), 173 cast<ConstantInt>(II->getArgOperand(1)) 174 ->getZExtValue(), 175 II->getMetadata(LLVMContext::MD_tbaa)); 176 // These intrinsics don't really modify the memory, but returning Mod 177 // will allow them to be handled conservatively. 178 return AliasAnalysis::Mod; 179 default: 180 break; 181 } 182 183 // Otherwise, just do the coarse-grained thing that always works. 184 if (Inst->mayWriteToMemory()) 185 return AliasAnalysis::ModRef; 186 if (Inst->mayReadFromMemory()) 187 return AliasAnalysis::Ref; 188 return AliasAnalysis::NoModRef; 189 } 190 191 /// getCallSiteDependencyFrom - Private helper for finding the local 192 /// dependencies of a call site. 193 MemDepResult MemoryDependenceAnalysis:: 194 getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, 195 BasicBlock::iterator ScanIt, BasicBlock *BB) { 196 unsigned Limit = BlockScanLimit; 197 198 // Walk backwards through the block, looking for dependencies 199 while (ScanIt != BB->begin()) { 200 // Limit the amount of scanning we do so we don't end up with quadratic 201 // running time on extreme testcases. 202 --Limit; 203 if (!Limit) 204 return MemDepResult::getUnknown(); 205 206 Instruction *Inst = --ScanIt; 207 208 // If this inst is a memory op, get the pointer it accessed 209 AliasAnalysis::Location Loc; 210 AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); 211 if (Loc.Ptr) { 212 // A simple instruction. 213 if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) 214 return MemDepResult::getClobber(Inst); 215 continue; 216 } 217 218 if (CallSite InstCS = cast<Value>(Inst)) { 219 // Debug intrinsics don't cause dependences. 220 if (isa<DbgInfoIntrinsic>(Inst)) continue; 221 // If these two calls do not interfere, look past it. 222 switch (AA->getModRefInfo(CS, InstCS)) { 223 case AliasAnalysis::NoModRef: 224 // If the two calls are the same, return InstCS as a Def, so that 225 // CS can be found redundant and eliminated. 226 if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && 227 CS.getInstruction()->isIdenticalToWhenDefined(Inst)) 228 return MemDepResult::getDef(Inst); 229 230 // Otherwise if the two calls don't interact (e.g. InstCS is readnone) 231 // keep scanning. 232 break; 233 default: 234 return MemDepResult::getClobber(Inst); 235 } 236 } 237 } 238 239 // No dependence found. If this is the entry block of the function, it is 240 // unknown, otherwise it is non-local. 241 if (BB != &BB->getParent()->getEntryBlock()) 242 return MemDepResult::getNonLocal(); 243 return MemDepResult::getNonFuncLocal(); 244 } 245 246 /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that 247 /// would fully overlap MemLoc if done as a wider legal integer load. 248 /// 249 /// MemLocBase, MemLocOffset are lazily computed here the first time the 250 /// base/offs of memloc is needed. 251 static bool 252 isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, 253 const Value *&MemLocBase, 254 int64_t &MemLocOffs, 255 const LoadInst *LI, 256 const TargetData *TD) { 257 // If we have no target data, we can't do this. 258 if (TD == 0) return false; 259 260 // If we haven't already computed the base/offset of MemLoc, do so now. 261 if (MemLocBase == 0) 262 MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); 263 264 unsigned Size = MemoryDependenceAnalysis:: 265 getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, 266 LI, *TD); 267 return Size != 0; 268 } 269 270 /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that 271 /// looks at a memory location for a load (specified by MemLocBase, Offs, 272 /// and Size) and compares it against a load. If the specified load could 273 /// be safely widened to a larger integer load that is 1) still efficient, 274 /// 2) safe for the target, and 3) would provide the specified memory 275 /// location value, then this function returns the size in bytes of the 276 /// load width to use. If not, this returns zero. 277 unsigned MemoryDependenceAnalysis:: 278 getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, 279 unsigned MemLocSize, const LoadInst *LI, 280 const TargetData &TD) { 281 // We can only extend simple integer loads. 282 if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; 283 284 // Get the base of this load. 285 int64_t LIOffs = 0; 286 const Value *LIBase = 287 GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); 288 289 // If the two pointers are not based on the same pointer, we can't tell that 290 // they are related. 291 if (LIBase != MemLocBase) return 0; 292 293 // Okay, the two values are based on the same pointer, but returned as 294 // no-alias. This happens when we have things like two byte loads at "P+1" 295 // and "P+3". Check to see if increasing the size of the "LI" load up to its 296 // alignment (or the largest native integer type) will allow us to load all 297 // the bits required by MemLoc. 298 299 // If MemLoc is before LI, then no widening of LI will help us out. 300 if (MemLocOffs < LIOffs) return 0; 301 302 // Get the alignment of the load in bytes. We assume that it is safe to load 303 // any legal integer up to this size without a problem. For example, if we're 304 // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can 305 // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it 306 // to i16. 307 unsigned LoadAlign = LI->getAlignment(); 308 309 int64_t MemLocEnd = MemLocOffs+MemLocSize; 310 311 // If no amount of rounding up will let MemLoc fit into LI, then bail out. 312 if (LIOffs+LoadAlign < MemLocEnd) return 0; 313 314 // This is the size of the load to try. Start with the next larger power of 315 // two. 316 unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; 317 NewLoadByteSize = NextPowerOf2(NewLoadByteSize); 318 319 while (1) { 320 // If this load size is bigger than our known alignment or would not fit 321 // into a native integer register, then we fail. 322 if (NewLoadByteSize > LoadAlign || 323 !TD.fitsInLegalInteger(NewLoadByteSize*8)) 324 return 0; 325 326 if (LIOffs+NewLoadByteSize > MemLocEnd && 327 LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) { 328 // We will be reading past the location accessed by the original program. 329 // While this is safe in a regular build, Address Safety analysis tools 330 // may start reporting false warnings. So, don't do widening. 331 return 0; 332 } 333 334 // If a load of this width would include all of MemLoc, then we succeed. 335 if (LIOffs+NewLoadByteSize >= MemLocEnd) 336 return NewLoadByteSize; 337 338 NewLoadByteSize <<= 1; 339 } 340 } 341 342 namespace { 343 /// Only find pointer captures which happen before the given instruction. Uses 344 /// the dominator tree to determine whether one instruction is before another. 345 struct CapturesBefore : public CaptureTracker { 346 CapturesBefore(const Instruction *I, DominatorTree *DT) 347 : BeforeHere(I), DT(DT), Captured(false) {} 348 349 void tooManyUses() { Captured = true; } 350 351 bool shouldExplore(Use *U) { 352 Instruction *I = cast<Instruction>(U->getUser()); 353 BasicBlock *BB = I->getParent(); 354 if (BeforeHere != I && 355 (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) 356 return false; 357 return true; 358 } 359 360 bool captured(Use *U) { 361 Instruction *I = cast<Instruction>(U->getUser()); 362 BasicBlock *BB = I->getParent(); 363 if (BeforeHere != I && 364 (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) 365 return false; 366 Captured = true; 367 return true; 368 } 369 370 const Instruction *BeforeHere; 371 DominatorTree *DT; 372 373 bool Captured; 374 }; 375 } 376 377 AliasAnalysis::ModRefResult 378 MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst, 379 const AliasAnalysis::Location &MemLoc) { 380 AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); 381 if (MR != AliasAnalysis::ModRef) return MR; 382 383 // FIXME: this is really just shoring-up a deficiency in alias analysis. 384 // BasicAA isn't willing to spend linear time determining whether an alloca 385 // was captured before or after this particular call, while we are. However, 386 // with a smarter AA in place, this test is just wasting compile time. 387 if (!DT) return AliasAnalysis::ModRef; 388 const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); 389 if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object)) 390 return AliasAnalysis::ModRef; 391 ImmutableCallSite CS(Inst); 392 if (!CS.getInstruction()) return AliasAnalysis::ModRef; 393 394 CapturesBefore CB(Inst, DT); 395 llvm::PointerMayBeCaptured(Object, &CB); 396 397 if (isa<Constant>(Object) || CS.getInstruction() == Object || CB.Captured) 398 return AliasAnalysis::ModRef; 399 400 unsigned ArgNo = 0; 401 for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); 402 CI != CE; ++CI, ++ArgNo) { 403 // Only look at the no-capture or byval pointer arguments. If this 404 // pointer were passed to arguments that were neither of these, then it 405 // couldn't be no-capture. 406 if (!(*CI)->getType()->isPointerTy() || 407 (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) 408 continue; 409 410 // If this is a no-capture pointer argument, see if we can tell that it 411 // is impossible to alias the pointer we're checking. If not, we have to 412 // assume that the call could touch the pointer, even though it doesn't 413 // escape. 414 if (!AA->isNoAlias(AliasAnalysis::Location(*CI), 415 AliasAnalysis::Location(Object))) { 416 return AliasAnalysis::ModRef; 417 } 418 } 419 return AliasAnalysis::NoModRef; 420 } 421 422 /// getPointerDependencyFrom - Return the instruction on which a memory 423 /// location depends. If isLoad is true, this routine ignores may-aliases with 424 /// read-only operations. If isLoad is false, this routine ignores may-aliases 425 /// with reads from read-only locations. 426 MemDepResult MemoryDependenceAnalysis:: 427 getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 428 BasicBlock::iterator ScanIt, BasicBlock *BB) { 429 430 const Value *MemLocBase = 0; 431 int64_t MemLocOffset = 0; 432 433 unsigned Limit = BlockScanLimit; 434 435 // Walk backwards through the basic block, looking for dependencies. 436 while (ScanIt != BB->begin()) { 437 // Limit the amount of scanning we do so we don't end up with quadratic 438 // running time on extreme testcases. 439 --Limit; 440 if (!Limit) 441 return MemDepResult::getUnknown(); 442 443 Instruction *Inst = --ScanIt; 444 445 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 446 // Debug intrinsics don't (and can't) cause dependences. 447 if (isa<DbgInfoIntrinsic>(II)) continue; 448 449 // If we reach a lifetime begin or end marker, then the query ends here 450 // because the value is undefined. 451 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 452 // FIXME: This only considers queries directly on the invariant-tagged 453 // pointer, not on query pointers that are indexed off of them. It'd 454 // be nice to handle that at some point (the right approach is to use 455 // GetPointerBaseWithConstantOffset). 456 if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)), 457 MemLoc)) 458 return MemDepResult::getDef(II); 459 continue; 460 } 461 } 462 463 // Values depend on loads if the pointers are must aliased. This means that 464 // a load depends on another must aliased load from the same value. 465 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 466 // Atomic loads have complications involved. 467 // FIXME: This is overly conservative. 468 if (!LI->isUnordered()) 469 return MemDepResult::getClobber(LI); 470 471 AliasAnalysis::Location LoadLoc = AA->getLocation(LI); 472 473 // If we found a pointer, check if it could be the same as our pointer. 474 AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); 475 476 if (isLoad) { 477 if (R == AliasAnalysis::NoAlias) { 478 // If this is an over-aligned integer load (for example, 479 // "load i8* %P, align 4") see if it would obviously overlap with the 480 // queried location if widened to a larger load (e.g. if the queried 481 // location is 1 byte at P+1). If so, return it as a load/load 482 // clobber result, allowing the client to decide to widen the load if 483 // it wants to. 484 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) 485 if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && 486 isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, 487 MemLocOffset, LI, TD)) 488 return MemDepResult::getClobber(Inst); 489 490 continue; 491 } 492 493 // Must aliased loads are defs of each other. 494 if (R == AliasAnalysis::MustAlias) 495 return MemDepResult::getDef(Inst); 496 497 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads 498 // in terms of clobbering loads, but since it does this by looking 499 // at the clobbering load directly, it doesn't know about any 500 // phi translation that may have happened along the way. 501 502 // If we have a partial alias, then return this as a clobber for the 503 // client to handle. 504 if (R == AliasAnalysis::PartialAlias) 505 return MemDepResult::getClobber(Inst); 506 #endif 507 508 // Random may-alias loads don't depend on each other without a 509 // dependence. 510 continue; 511 } 512 513 // Stores don't depend on other no-aliased accesses. 514 if (R == AliasAnalysis::NoAlias) 515 continue; 516 517 // Stores don't alias loads from read-only memory. 518 if (AA->pointsToConstantMemory(LoadLoc)) 519 continue; 520 521 // Stores depend on may/must aliased loads. 522 return MemDepResult::getDef(Inst); 523 } 524 525 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 526 // Atomic stores have complications involved. 527 // FIXME: This is overly conservative. 528 if (!SI->isUnordered()) 529 return MemDepResult::getClobber(SI); 530 531 // If alias analysis can tell that this store is guaranteed to not modify 532 // the query pointer, ignore it. Use getModRefInfo to handle cases where 533 // the query pointer points to constant memory etc. 534 if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) 535 continue; 536 537 // Ok, this store might clobber the query pointer. Check to see if it is 538 // a must alias: in this case, we want to return this as a def. 539 AliasAnalysis::Location StoreLoc = AA->getLocation(SI); 540 541 // If we found a pointer, check if it could be the same as our pointer. 542 AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); 543 544 if (R == AliasAnalysis::NoAlias) 545 continue; 546 if (R == AliasAnalysis::MustAlias) 547 return MemDepResult::getDef(Inst); 548 return MemDepResult::getClobber(Inst); 549 } 550 551 // If this is an allocation, and if we know that the accessed pointer is to 552 // the allocation, return Def. This means that there is no dependence and 553 // the access can be optimized based on that. For example, a load could 554 // turn into undef. 555 // Note: Only determine this to be a malloc if Inst is the malloc call, not 556 // a subsequent bitcast of the malloc call result. There can be stores to 557 // the malloced memory between the malloc call and its bitcast uses, and we 558 // need to continue scanning until the malloc call. 559 if (isa<AllocaInst>(Inst) || 560 (isa<CallInst>(Inst) && extractMallocCall(Inst))) { 561 const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); 562 563 if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) 564 return MemDepResult::getDef(Inst); 565 continue; 566 } 567 568 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. 569 switch (getModRefInfo(Inst, MemLoc)) { 570 case AliasAnalysis::NoModRef: 571 // If the call has no effect on the queried pointer, just ignore it. 572 continue; 573 case AliasAnalysis::Mod: 574 return MemDepResult::getClobber(Inst); 575 case AliasAnalysis::Ref: 576 // If the call is known to never store to the pointer, and if this is a 577 // load query, we can safely ignore it (scan past it). 578 if (isLoad) 579 continue; 580 default: 581 // Otherwise, there is a potential dependence. Return a clobber. 582 return MemDepResult::getClobber(Inst); 583 } 584 } 585 586 // No dependence found. If this is the entry block of the function, it is 587 // unknown, otherwise it is non-local. 588 if (BB != &BB->getParent()->getEntryBlock()) 589 return MemDepResult::getNonLocal(); 590 return MemDepResult::getNonFuncLocal(); 591 } 592 593 /// getDependency - Return the instruction on which a memory operation 594 /// depends. 595 MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { 596 Instruction *ScanPos = QueryInst; 597 598 // Check for a cached result 599 MemDepResult &LocalCache = LocalDeps[QueryInst]; 600 601 // If the cached entry is non-dirty, just return it. Note that this depends 602 // on MemDepResult's default constructing to 'dirty'. 603 if (!LocalCache.isDirty()) 604 return LocalCache; 605 606 // Otherwise, if we have a dirty entry, we know we can start the scan at that 607 // instruction, which may save us some work. 608 if (Instruction *Inst = LocalCache.getInst()) { 609 ScanPos = Inst; 610 611 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); 612 } 613 614 BasicBlock *QueryParent = QueryInst->getParent(); 615 616 // Do the scan. 617 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { 618 // No dependence found. If this is the entry block of the function, it is 619 // unknown, otherwise it is non-local. 620 if (QueryParent != &QueryParent->getParent()->getEntryBlock()) 621 LocalCache = MemDepResult::getNonLocal(); 622 else 623 LocalCache = MemDepResult::getNonFuncLocal(); 624 } else { 625 AliasAnalysis::Location MemLoc; 626 AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); 627 if (MemLoc.Ptr) { 628 // If we can do a pointer scan, make it happen. 629 bool isLoad = !(MR & AliasAnalysis::Mod); 630 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) 631 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; 632 633 LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, 634 QueryParent); 635 } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { 636 CallSite QueryCS(QueryInst); 637 bool isReadOnly = AA->onlyReadsMemory(QueryCS); 638 LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, 639 QueryParent); 640 } else 641 // Non-memory instruction. 642 LocalCache = MemDepResult::getUnknown(); 643 } 644 645 // Remember the result! 646 if (Instruction *I = LocalCache.getInst()) 647 ReverseLocalDeps[I].insert(QueryInst); 648 649 return LocalCache; 650 } 651 652 #ifndef NDEBUG 653 /// AssertSorted - This method is used when -debug is specified to verify that 654 /// cache arrays are properly kept sorted. 655 static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, 656 int Count = -1) { 657 if (Count == -1) Count = Cache.size(); 658 if (Count == 0) return; 659 660 for (unsigned i = 1; i != unsigned(Count); ++i) 661 assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); 662 } 663 #endif 664 665 /// getNonLocalCallDependency - Perform a full dependency query for the 666 /// specified call, returning the set of blocks that the value is 667 /// potentially live across. The returned set of results will include a 668 /// "NonLocal" result for all blocks where the value is live across. 669 /// 670 /// This method assumes the instruction returns a "NonLocal" dependency 671 /// within its own block. 672 /// 673 /// This returns a reference to an internal data structure that may be 674 /// invalidated on the next non-local query or when an instruction is 675 /// removed. Clients must copy this data if they want it around longer than 676 /// that. 677 const MemoryDependenceAnalysis::NonLocalDepInfo & 678 MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { 679 assert(getDependency(QueryCS.getInstruction()).isNonLocal() && 680 "getNonLocalCallDependency should only be used on calls with non-local deps!"); 681 PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()]; 682 NonLocalDepInfo &Cache = CacheP.first; 683 684 /// DirtyBlocks - This is the set of blocks that need to be recomputed. In 685 /// the cached case, this can happen due to instructions being deleted etc. In 686 /// the uncached case, this starts out as the set of predecessors we care 687 /// about. 688 SmallVector<BasicBlock*, 32> DirtyBlocks; 689 690 if (!Cache.empty()) { 691 // Okay, we have a cache entry. If we know it is not dirty, just return it 692 // with no computation. 693 if (!CacheP.second) { 694 ++NumCacheNonLocal; 695 return Cache; 696 } 697 698 // If we already have a partially computed set of results, scan them to 699 // determine what is dirty, seeding our initial DirtyBlocks worklist. 700 for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); 701 I != E; ++I) 702 if (I->getResult().isDirty()) 703 DirtyBlocks.push_back(I->getBB()); 704 705 // Sort the cache so that we can do fast binary search lookups below. 706 std::sort(Cache.begin(), Cache.end()); 707 708 ++NumCacheDirtyNonLocal; 709 //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " 710 // << Cache.size() << " cached: " << *QueryInst; 711 } else { 712 // Seed DirtyBlocks with each of the preds of QueryInst's block. 713 BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); 714 for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) 715 DirtyBlocks.push_back(*PI); 716 ++NumUncacheNonLocal; 717 } 718 719 // isReadonlyCall - If this is a read-only call, we can be more aggressive. 720 bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); 721 722 SmallPtrSet<BasicBlock*, 64> Visited; 723 724 unsigned NumSortedEntries = Cache.size(); 725 DEBUG(AssertSorted(Cache)); 726 727 // Iterate while we still have blocks to update. 728 while (!DirtyBlocks.empty()) { 729 BasicBlock *DirtyBB = DirtyBlocks.back(); 730 DirtyBlocks.pop_back(); 731 732 // Already processed this block? 733 if (!Visited.insert(DirtyBB)) 734 continue; 735 736 // Do a binary search to see if we already have an entry for this block in 737 // the cache set. If so, find it. 738 DEBUG(AssertSorted(Cache, NumSortedEntries)); 739 NonLocalDepInfo::iterator Entry = 740 std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, 741 NonLocalDepEntry(DirtyBB)); 742 if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) 743 --Entry; 744 745 NonLocalDepEntry *ExistingResult = 0; 746 if (Entry != Cache.begin()+NumSortedEntries && 747 Entry->getBB() == DirtyBB) { 748 // If we already have an entry, and if it isn't already dirty, the block 749 // is done. 750 if (!Entry->getResult().isDirty()) 751 continue; 752 753 // Otherwise, remember this slot so we can update the value. 754 ExistingResult = &*Entry; 755 } 756 757 // If the dirty entry has a pointer, start scanning from it so we don't have 758 // to rescan the entire block. 759 BasicBlock::iterator ScanPos = DirtyBB->end(); 760 if (ExistingResult) { 761 if (Instruction *Inst = ExistingResult->getResult().getInst()) { 762 ScanPos = Inst; 763 // We're removing QueryInst's use of Inst. 764 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, 765 QueryCS.getInstruction()); 766 } 767 } 768 769 // Find out if this block has a local dependency for QueryInst. 770 MemDepResult Dep; 771 772 if (ScanPos != DirtyBB->begin()) { 773 Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); 774 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { 775 // No dependence found. If this is the entry block of the function, it is 776 // a clobber, otherwise it is unknown. 777 Dep = MemDepResult::getNonLocal(); 778 } else { 779 Dep = MemDepResult::getNonFuncLocal(); 780 } 781 782 // If we had a dirty entry for the block, update it. Otherwise, just add 783 // a new entry. 784 if (ExistingResult) 785 ExistingResult->setResult(Dep); 786 else 787 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); 788 789 // If the block has a dependency (i.e. it isn't completely transparent to 790 // the value), remember the association! 791 if (!Dep.isNonLocal()) { 792 // Keep the ReverseNonLocalDeps map up to date so we can efficiently 793 // update this when we remove instructions. 794 if (Instruction *Inst = Dep.getInst()) 795 ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); 796 } else { 797 798 // If the block *is* completely transparent to the load, we need to check 799 // the predecessors of this block. Add them to our worklist. 800 for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) 801 DirtyBlocks.push_back(*PI); 802 } 803 } 804 805 return Cache; 806 } 807 808 /// getNonLocalPointerDependency - Perform a full dependency query for an 809 /// access to the specified (non-volatile) memory location, returning the 810 /// set of instructions that either define or clobber the value. 811 /// 812 /// This method assumes the pointer has a "NonLocal" dependency within its 813 /// own block. 814 /// 815 void MemoryDependenceAnalysis:: 816 getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, 817 BasicBlock *FromBB, 818 SmallVectorImpl<NonLocalDepResult> &Result) { 819 assert(Loc.Ptr->getType()->isPointerTy() && 820 "Can't get pointer deps of a non-pointer!"); 821 Result.clear(); 822 823 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); 824 825 // This is the set of blocks we've inspected, and the pointer we consider in 826 // each block. Because of critical edges, we currently bail out if querying 827 // a block with multiple different pointers. This can happen during PHI 828 // translation. 829 DenseMap<BasicBlock*, Value*> Visited; 830 if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, 831 Result, Visited, true)) 832 return; 833 Result.clear(); 834 Result.push_back(NonLocalDepResult(FromBB, 835 MemDepResult::getUnknown(), 836 const_cast<Value *>(Loc.Ptr))); 837 } 838 839 /// GetNonLocalInfoForBlock - Compute the memdep value for BB with 840 /// Pointer/PointeeSize using either cached information in Cache or by doing a 841 /// lookup (which may use dirty cache info if available). If we do a lookup, 842 /// add the result to the cache. 843 MemDepResult MemoryDependenceAnalysis:: 844 GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, 845 bool isLoad, BasicBlock *BB, 846 NonLocalDepInfo *Cache, unsigned NumSortedEntries) { 847 848 // Do a binary search to see if we already have an entry for this block in 849 // the cache set. If so, find it. 850 NonLocalDepInfo::iterator Entry = 851 std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, 852 NonLocalDepEntry(BB)); 853 if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) 854 --Entry; 855 856 NonLocalDepEntry *ExistingResult = 0; 857 if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) 858 ExistingResult = &*Entry; 859 860 // If we have a cached entry, and it is non-dirty, use it as the value for 861 // this dependency. 862 if (ExistingResult && !ExistingResult->getResult().isDirty()) { 863 ++NumCacheNonLocalPtr; 864 return ExistingResult->getResult(); 865 } 866 867 // Otherwise, we have to scan for the value. If we have a dirty cache 868 // entry, start scanning from its position, otherwise we scan from the end 869 // of the block. 870 BasicBlock::iterator ScanPos = BB->end(); 871 if (ExistingResult && ExistingResult->getResult().getInst()) { 872 assert(ExistingResult->getResult().getInst()->getParent() == BB && 873 "Instruction invalidated?"); 874 ++NumCacheDirtyNonLocalPtr; 875 ScanPos = ExistingResult->getResult().getInst(); 876 877 // Eliminating the dirty entry from 'Cache', so update the reverse info. 878 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); 879 RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); 880 } else { 881 ++NumUncacheNonLocalPtr; 882 } 883 884 // Scan the block for the dependency. 885 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); 886 887 // If we had a dirty entry for the block, update it. Otherwise, just add 888 // a new entry. 889 if (ExistingResult) 890 ExistingResult->setResult(Dep); 891 else 892 Cache->push_back(NonLocalDepEntry(BB, Dep)); 893 894 // If the block has a dependency (i.e. it isn't completely transparent to 895 // the value), remember the reverse association because we just added it 896 // to Cache! 897 if (!Dep.isDef() && !Dep.isClobber()) 898 return Dep; 899 900 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently 901 // update MemDep when we remove instructions. 902 Instruction *Inst = Dep.getInst(); 903 assert(Inst && "Didn't depend on anything?"); 904 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); 905 ReverseNonLocalPtrDeps[Inst].insert(CacheKey); 906 return Dep; 907 } 908 909 /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain 910 /// number of elements in the array that are already properly ordered. This is 911 /// optimized for the case when only a few entries are added. 912 static void 913 SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, 914 unsigned NumSortedEntries) { 915 switch (Cache.size() - NumSortedEntries) { 916 case 0: 917 // done, no new entries. 918 break; 919 case 2: { 920 // Two new entries, insert the last one into place. 921 NonLocalDepEntry Val = Cache.back(); 922 Cache.pop_back(); 923 MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = 924 std::upper_bound(Cache.begin(), Cache.end()-1, Val); 925 Cache.insert(Entry, Val); 926 // FALL THROUGH. 927 } 928 case 1: 929 // One new entry, Just insert the new value at the appropriate position. 930 if (Cache.size() != 1) { 931 NonLocalDepEntry Val = Cache.back(); 932 Cache.pop_back(); 933 MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = 934 std::upper_bound(Cache.begin(), Cache.end(), Val); 935 Cache.insert(Entry, Val); 936 } 937 break; 938 default: 939 // Added many values, do a full scale sort. 940 std::sort(Cache.begin(), Cache.end()); 941 break; 942 } 943 } 944 945 /// getNonLocalPointerDepFromBB - Perform a dependency query based on 946 /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def 947 /// results to the results vector and keep track of which blocks are visited in 948 /// 'Visited'. 949 /// 950 /// This has special behavior for the first block queries (when SkipFirstBlock 951 /// is true). In this special case, it ignores the contents of the specified 952 /// block and starts returning dependence info for its predecessors. 953 /// 954 /// This function returns false on success, or true to indicate that it could 955 /// not compute dependence information for some reason. This should be treated 956 /// as a clobber dependence on the first instruction in the predecessor block. 957 bool MemoryDependenceAnalysis:: 958 getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, 959 const AliasAnalysis::Location &Loc, 960 bool isLoad, BasicBlock *StartBB, 961 SmallVectorImpl<NonLocalDepResult> &Result, 962 DenseMap<BasicBlock*, Value*> &Visited, 963 bool SkipFirstBlock) { 964 965 // Look up the cached info for Pointer. 966 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); 967 968 // Set up a temporary NLPI value. If the map doesn't yet have an entry for 969 // CacheKey, this value will be inserted as the associated value. Otherwise, 970 // it'll be ignored, and we'll have to check to see if the cached size and 971 // tbaa tag are consistent with the current query. 972 NonLocalPointerInfo InitialNLPI; 973 InitialNLPI.Size = Loc.Size; 974 InitialNLPI.TBAATag = Loc.TBAATag; 975 976 // Get the NLPI for CacheKey, inserting one into the map if it doesn't 977 // already have one. 978 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 979 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); 980 NonLocalPointerInfo *CacheInfo = &Pair.first->second; 981 982 // If we already have a cache entry for this CacheKey, we may need to do some 983 // work to reconcile the cache entry and the current query. 984 if (!Pair.second) { 985 if (CacheInfo->Size < Loc.Size) { 986 // The query's Size is greater than the cached one. Throw out the 987 // cached data and procede with the query at the greater size. 988 CacheInfo->Pair = BBSkipFirstBlockPair(); 989 CacheInfo->Size = Loc.Size; 990 for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), 991 DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) 992 if (Instruction *Inst = DI->getResult().getInst()) 993 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); 994 CacheInfo->NonLocalDeps.clear(); 995 } else if (CacheInfo->Size > Loc.Size) { 996 // This query's Size is less than the cached one. Conservatively restart 997 // the query using the greater size. 998 return getNonLocalPointerDepFromBB(Pointer, 999 Loc.getWithNewSize(CacheInfo->Size), 1000 isLoad, StartBB, Result, Visited, 1001 SkipFirstBlock); 1002 } 1003 1004 // If the query's TBAATag is inconsistent with the cached one, 1005 // conservatively throw out the cached data and restart the query with 1006 // no tag if needed. 1007 if (CacheInfo->TBAATag != Loc.TBAATag) { 1008 if (CacheInfo->TBAATag) { 1009 CacheInfo->Pair = BBSkipFirstBlockPair(); 1010 CacheInfo->TBAATag = 0; 1011 for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), 1012 DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) 1013 if (Instruction *Inst = DI->getResult().getInst()) 1014 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); 1015 CacheInfo->NonLocalDeps.clear(); 1016 } 1017 if (Loc.TBAATag) 1018 return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(), 1019 isLoad, StartBB, Result, Visited, 1020 SkipFirstBlock); 1021 } 1022 } 1023 1024 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps; 1025 1026 // If we have valid cached information for exactly the block we are 1027 // investigating, just return it with no recomputation. 1028 if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { 1029 // We have a fully cached result for this query then we can just return the 1030 // cached results and populate the visited set. However, we have to verify 1031 // that we don't already have conflicting results for these blocks. Check 1032 // to ensure that if a block in the results set is in the visited set that 1033 // it was for the same pointer query. 1034 if (!Visited.empty()) { 1035 for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); 1036 I != E; ++I) { 1037 DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); 1038 if (VI == Visited.end() || VI->second == Pointer.getAddr()) 1039 continue; 1040 1041 // We have a pointer mismatch in a block. Just return clobber, saying 1042 // that something was clobbered in this result. We could also do a 1043 // non-fully cached query, but there is little point in doing this. 1044 return true; 1045 } 1046 } 1047 1048 Value *Addr = Pointer.getAddr(); 1049 for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); 1050 I != E; ++I) { 1051 Visited.insert(std::make_pair(I->getBB(), Addr)); 1052 if (!I->getResult().isNonLocal()) 1053 Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); 1054 } 1055 ++NumCacheCompleteNonLocalPtr; 1056 return false; 1057 } 1058 1059 // Otherwise, either this is a new block, a block with an invalid cache 1060 // pointer or one that we're about to invalidate by putting more info into it 1061 // than its valid cache info. If empty, the result will be valid cache info, 1062 // otherwise it isn't. 1063 if (Cache->empty()) 1064 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); 1065 else 1066 CacheInfo->Pair = BBSkipFirstBlockPair(); 1067 1068 SmallVector<BasicBlock*, 32> Worklist; 1069 Worklist.push_back(StartBB); 1070 1071 // PredList used inside loop. 1072 SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; 1073 1074 // Keep track of the entries that we know are sorted. Previously cached 1075 // entries will all be sorted. The entries we add we only sort on demand (we 1076 // don't insert every element into its sorted position). We know that we 1077 // won't get any reuse from currently inserted values, because we don't 1078 // revisit blocks after we insert info for them. 1079 unsigned NumSortedEntries = Cache->size(); 1080 DEBUG(AssertSorted(*Cache)); 1081 1082 while (!Worklist.empty()) { 1083 BasicBlock *BB = Worklist.pop_back_val(); 1084 1085 // Skip the first block if we have it. 1086 if (!SkipFirstBlock) { 1087 // Analyze the dependency of *Pointer in FromBB. See if we already have 1088 // been here. 1089 assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); 1090 1091 // Get the dependency info for Pointer in BB. If we have cached 1092 // information, we will use it, otherwise we compute it. 1093 DEBUG(AssertSorted(*Cache, NumSortedEntries)); 1094 MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, 1095 NumSortedEntries); 1096 1097 // If we got a Def or Clobber, add this to the list of results. 1098 if (!Dep.isNonLocal()) { 1099 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); 1100 continue; 1101 } 1102 } 1103 1104 // If 'Pointer' is an instruction defined in this block, then we need to do 1105 // phi translation to change it into a value live in the predecessor block. 1106 // If not, we just add the predecessors to the worklist and scan them with 1107 // the same Pointer. 1108 if (!Pointer.NeedsPHITranslationFromBlock(BB)) { 1109 SkipFirstBlock = false; 1110 SmallVector<BasicBlock*, 16> NewBlocks; 1111 for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { 1112 // Verify that we haven't looked at this block yet. 1113 std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> 1114 InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); 1115 if (InsertRes.second) { 1116 // First time we've looked at *PI. 1117 NewBlocks.push_back(*PI); 1118 continue; 1119 } 1120 1121 // If we have seen this block before, but it was with a different 1122 // pointer then we have a phi translation failure and we have to treat 1123 // this as a clobber. 1124 if (InsertRes.first->second != Pointer.getAddr()) { 1125 // Make sure to clean up the Visited map before continuing on to 1126 // PredTranslationFailure. 1127 for (unsigned i = 0; i < NewBlocks.size(); i++) 1128 Visited.erase(NewBlocks[i]); 1129 goto PredTranslationFailure; 1130 } 1131 } 1132 Worklist.append(NewBlocks.begin(), NewBlocks.end()); 1133 continue; 1134 } 1135 1136 // We do need to do phi translation, if we know ahead of time we can't phi 1137 // translate this value, don't even try. 1138 if (!Pointer.IsPotentiallyPHITranslatable()) 1139 goto PredTranslationFailure; 1140 1141 // We may have added values to the cache list before this PHI translation. 1142 // If so, we haven't done anything to ensure that the cache remains sorted. 1143 // Sort it now (if needed) so that recursive invocations of 1144 // getNonLocalPointerDepFromBB and other routines that could reuse the cache 1145 // value will only see properly sorted cache arrays. 1146 if (Cache && NumSortedEntries != Cache->size()) { 1147 SortNonLocalDepInfoCache(*Cache, NumSortedEntries); 1148 NumSortedEntries = Cache->size(); 1149 } 1150 Cache = 0; 1151 1152 PredList.clear(); 1153 for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { 1154 BasicBlock *Pred = *PI; 1155 PredList.push_back(std::make_pair(Pred, Pointer)); 1156 1157 // Get the PHI translated pointer in this predecessor. This can fail if 1158 // not translatable, in which case the getAddr() returns null. 1159 PHITransAddr &PredPointer = PredList.back().second; 1160 PredPointer.PHITranslateValue(BB, Pred, 0); 1161 1162 Value *PredPtrVal = PredPointer.getAddr(); 1163 1164 // Check to see if we have already visited this pred block with another 1165 // pointer. If so, we can't do this lookup. This failure can occur 1166 // with PHI translation when a critical edge exists and the PHI node in 1167 // the successor translates to a pointer value different than the 1168 // pointer the block was first analyzed with. 1169 std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> 1170 InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); 1171 1172 if (!InsertRes.second) { 1173 // We found the pred; take it off the list of preds to visit. 1174 PredList.pop_back(); 1175 1176 // If the predecessor was visited with PredPtr, then we already did 1177 // the analysis and can ignore it. 1178 if (InsertRes.first->second == PredPtrVal) 1179 continue; 1180 1181 // Otherwise, the block was previously analyzed with a different 1182 // pointer. We can't represent the result of this case, so we just 1183 // treat this as a phi translation failure. 1184 1185 // Make sure to clean up the Visited map before continuing on to 1186 // PredTranslationFailure. 1187 for (unsigned i = 0; i < PredList.size(); i++) 1188 Visited.erase(PredList[i].first); 1189 1190 goto PredTranslationFailure; 1191 } 1192 } 1193 1194 // Actually process results here; this need to be a separate loop to avoid 1195 // calling getNonLocalPointerDepFromBB for blocks we don't want to return 1196 // any results for. (getNonLocalPointerDepFromBB will modify our 1197 // datastructures in ways the code after the PredTranslationFailure label 1198 // doesn't expect.) 1199 for (unsigned i = 0; i < PredList.size(); i++) { 1200 BasicBlock *Pred = PredList[i].first; 1201 PHITransAddr &PredPointer = PredList[i].second; 1202 Value *PredPtrVal = PredPointer.getAddr(); 1203 1204 bool CanTranslate = true; 1205 // If PHI translation was unable to find an available pointer in this 1206 // predecessor, then we have to assume that the pointer is clobbered in 1207 // that predecessor. We can still do PRE of the load, which would insert 1208 // a computation of the pointer in this predecessor. 1209 if (PredPtrVal == 0) 1210 CanTranslate = false; 1211 1212 // FIXME: it is entirely possible that PHI translating will end up with 1213 // the same value. Consider PHI translating something like: 1214 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* 1215 // to recurse here, pedantically speaking. 1216 1217 // If getNonLocalPointerDepFromBB fails here, that means the cached 1218 // result conflicted with the Visited list; we have to conservatively 1219 // assume it is unknown, but this also does not block PRE of the load. 1220 if (!CanTranslate || 1221 getNonLocalPointerDepFromBB(PredPointer, 1222 Loc.getWithNewPtr(PredPtrVal), 1223 isLoad, Pred, 1224 Result, Visited)) { 1225 // Add the entry to the Result list. 1226 NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); 1227 Result.push_back(Entry); 1228 1229 // Since we had a phi translation failure, the cache for CacheKey won't 1230 // include all of the entries that we need to immediately satisfy future 1231 // queries. Mark this in NonLocalPointerDeps by setting the 1232 // BBSkipFirstBlockPair pointer to null. This requires reuse of the 1233 // cached value to do more work but not miss the phi trans failure. 1234 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey]; 1235 NLPI.Pair = BBSkipFirstBlockPair(); 1236 continue; 1237 } 1238 } 1239 1240 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. 1241 CacheInfo = &NonLocalPointerDeps[CacheKey]; 1242 Cache = &CacheInfo->NonLocalDeps; 1243 NumSortedEntries = Cache->size(); 1244 1245 // Since we did phi translation, the "Cache" set won't contain all of the 1246 // results for the query. This is ok (we can still use it to accelerate 1247 // specific block queries) but we can't do the fastpath "return all 1248 // results from the set" Clear out the indicator for this. 1249 CacheInfo->Pair = BBSkipFirstBlockPair(); 1250 SkipFirstBlock = false; 1251 continue; 1252 1253 PredTranslationFailure: 1254 // The following code is "failure"; we can't produce a sane translation 1255 // for the given block. It assumes that we haven't modified any of 1256 // our datastructures while processing the current block. 1257 1258 if (Cache == 0) { 1259 // Refresh the CacheInfo/Cache pointer if it got invalidated. 1260 CacheInfo = &NonLocalPointerDeps[CacheKey]; 1261 Cache = &CacheInfo->NonLocalDeps; 1262 NumSortedEntries = Cache->size(); 1263 } 1264 1265 // Since we failed phi translation, the "Cache" set won't contain all of the 1266 // results for the query. This is ok (we can still use it to accelerate 1267 // specific block queries) but we can't do the fastpath "return all 1268 // results from the set". Clear out the indicator for this. 1269 CacheInfo->Pair = BBSkipFirstBlockPair(); 1270 1271 // If *nothing* works, mark the pointer as unknown. 1272 // 1273 // If this is the magic first block, return this as a clobber of the whole 1274 // incoming value. Since we can't phi translate to one of the predecessors, 1275 // we have to bail out. 1276 if (SkipFirstBlock) 1277 return true; 1278 1279 for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { 1280 assert(I != Cache->rend() && "Didn't find current block??"); 1281 if (I->getBB() != BB) 1282 continue; 1283 1284 assert(I->getResult().isNonLocal() && 1285 "Should only be here with transparent block"); 1286 I->setResult(MemDepResult::getUnknown()); 1287 Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), 1288 Pointer.getAddr())); 1289 break; 1290 } 1291 } 1292 1293 // Okay, we're done now. If we added new values to the cache, re-sort it. 1294 SortNonLocalDepInfoCache(*Cache, NumSortedEntries); 1295 DEBUG(AssertSorted(*Cache)); 1296 return false; 1297 } 1298 1299 /// RemoveCachedNonLocalPointerDependencies - If P exists in 1300 /// CachedNonLocalPointerInfo, remove it. 1301 void MemoryDependenceAnalysis:: 1302 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { 1303 CachedNonLocalPointerInfo::iterator It = 1304 NonLocalPointerDeps.find(P); 1305 if (It == NonLocalPointerDeps.end()) return; 1306 1307 // Remove all of the entries in the BB->val map. This involves removing 1308 // instructions from the reverse map. 1309 NonLocalDepInfo &PInfo = It->second.NonLocalDeps; 1310 1311 for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { 1312 Instruction *Target = PInfo[i].getResult().getInst(); 1313 if (Target == 0) continue; // Ignore non-local dep results. 1314 assert(Target->getParent() == PInfo[i].getBB()); 1315 1316 // Eliminating the dirty entry from 'Cache', so update the reverse info. 1317 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); 1318 } 1319 1320 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). 1321 NonLocalPointerDeps.erase(It); 1322 } 1323 1324 1325 /// invalidateCachedPointerInfo - This method is used to invalidate cached 1326 /// information about the specified pointer, because it may be too 1327 /// conservative in memdep. This is an optional call that can be used when 1328 /// the client detects an equivalence between the pointer and some other 1329 /// value and replaces the other value with ptr. This can make Ptr available 1330 /// in more places that cached info does not necessarily keep. 1331 void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { 1332 // If Ptr isn't really a pointer, just ignore it. 1333 if (!Ptr->getType()->isPointerTy()) return; 1334 // Flush store info for the pointer. 1335 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); 1336 // Flush load info for the pointer. 1337 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); 1338 } 1339 1340 /// invalidateCachedPredecessors - Clear the PredIteratorCache info. 1341 /// This needs to be done when the CFG changes, e.g., due to splitting 1342 /// critical edges. 1343 void MemoryDependenceAnalysis::invalidateCachedPredecessors() { 1344 PredCache->clear(); 1345 } 1346 1347 /// removeInstruction - Remove an instruction from the dependence analysis, 1348 /// updating the dependence of instructions that previously depended on it. 1349 /// This method attempts to keep the cache coherent using the reverse map. 1350 void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { 1351 // Walk through the Non-local dependencies, removing this one as the value 1352 // for any cached queries. 1353 NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst); 1354 if (NLDI != NonLocalDeps.end()) { 1355 NonLocalDepInfo &BlockMap = NLDI->second.first; 1356 for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); 1357 DI != DE; ++DI) 1358 if (Instruction *Inst = DI->getResult().getInst()) 1359 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); 1360 NonLocalDeps.erase(NLDI); 1361 } 1362 1363 // If we have a cached local dependence query for this instruction, remove it. 1364 // 1365 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); 1366 if (LocalDepEntry != LocalDeps.end()) { 1367 // Remove us from DepInst's reverse set now that the local dep info is gone. 1368 if (Instruction *Inst = LocalDepEntry->second.getInst()) 1369 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst); 1370 1371 // Remove this local dependency info. 1372 LocalDeps.erase(LocalDepEntry); 1373 } 1374 1375 // If we have any cached pointer dependencies on this instruction, remove 1376 // them. If the instruction has non-pointer type, then it can't be a pointer 1377 // base. 1378 1379 // Remove it from both the load info and the store info. The instruction 1380 // can't be in either of these maps if it is non-pointer. 1381 if (RemInst->getType()->isPointerTy()) { 1382 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); 1383 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); 1384 } 1385 1386 // Loop over all of the things that depend on the instruction we're removing. 1387 // 1388 SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; 1389 1390 // If we find RemInst as a clobber or Def in any of the maps for other values, 1391 // we need to replace its entry with a dirty version of the instruction after 1392 // it. If RemInst is a terminator, we use a null dirty value. 1393 // 1394 // Using a dirty version of the instruction after RemInst saves having to scan 1395 // the entire block to get to this point. 1396 MemDepResult NewDirtyVal; 1397 if (!RemInst->isTerminator()) 1398 NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); 1399 1400 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); 1401 if (ReverseDepIt != ReverseLocalDeps.end()) { 1402 SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second; 1403 // RemInst can't be the terminator if it has local stuff depending on it. 1404 assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) && 1405 "Nothing can locally depend on a terminator"); 1406 1407 for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(), 1408 E = ReverseDeps.end(); I != E; ++I) { 1409 Instruction *InstDependingOnRemInst = *I; 1410 assert(InstDependingOnRemInst != RemInst && 1411 "Already removed our local dep info"); 1412 1413 LocalDeps[InstDependingOnRemInst] = NewDirtyVal; 1414 1415 // Make sure to remember that new things depend on NewDepInst. 1416 assert(NewDirtyVal.getInst() && "There is no way something else can have " 1417 "a local dep on this if it is a terminator!"); 1418 ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), 1419 InstDependingOnRemInst)); 1420 } 1421 1422 ReverseLocalDeps.erase(ReverseDepIt); 1423 1424 // Add new reverse deps after scanning the set, to avoid invalidating the 1425 // 'ReverseDeps' reference. 1426 while (!ReverseDepsToAdd.empty()) { 1427 ReverseLocalDeps[ReverseDepsToAdd.back().first] 1428 .insert(ReverseDepsToAdd.back().second); 1429 ReverseDepsToAdd.pop_back(); 1430 } 1431 } 1432 1433 ReverseDepIt = ReverseNonLocalDeps.find(RemInst); 1434 if (ReverseDepIt != ReverseNonLocalDeps.end()) { 1435 SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second; 1436 for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end(); 1437 I != E; ++I) { 1438 assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); 1439 1440 PerInstNLInfo &INLD = NonLocalDeps[*I]; 1441 // The information is now dirty! 1442 INLD.second = true; 1443 1444 for (NonLocalDepInfo::iterator DI = INLD.first.begin(), 1445 DE = INLD.first.end(); DI != DE; ++DI) { 1446 if (DI->getResult().getInst() != RemInst) continue; 1447 1448 // Convert to a dirty entry for the subsequent instruction. 1449 DI->setResult(NewDirtyVal); 1450 1451 if (Instruction *NextI = NewDirtyVal.getInst()) 1452 ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); 1453 } 1454 } 1455 1456 ReverseNonLocalDeps.erase(ReverseDepIt); 1457 1458 // Add new reverse deps after scanning the set, to avoid invalidating 'Set' 1459 while (!ReverseDepsToAdd.empty()) { 1460 ReverseNonLocalDeps[ReverseDepsToAdd.back().first] 1461 .insert(ReverseDepsToAdd.back().second); 1462 ReverseDepsToAdd.pop_back(); 1463 } 1464 } 1465 1466 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a 1467 // value in the NonLocalPointerDeps info. 1468 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = 1469 ReverseNonLocalPtrDeps.find(RemInst); 1470 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { 1471 SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second; 1472 SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; 1473 1474 for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(), 1475 E = Set.end(); I != E; ++I) { 1476 ValueIsLoadPair P = *I; 1477 assert(P.getPointer() != RemInst && 1478 "Already removed NonLocalPointerDeps info for RemInst"); 1479 1480 NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; 1481 1482 // The cache is not valid for any specific block anymore. 1483 NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); 1484 1485 // Update any entries for RemInst to use the instruction after it. 1486 for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); 1487 DI != DE; ++DI) { 1488 if (DI->getResult().getInst() != RemInst) continue; 1489 1490 // Convert to a dirty entry for the subsequent instruction. 1491 DI->setResult(NewDirtyVal); 1492 1493 if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) 1494 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); 1495 } 1496 1497 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its 1498 // subsequent value may invalidate the sortedness. 1499 std::sort(NLPDI.begin(), NLPDI.end()); 1500 } 1501 1502 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); 1503 1504 while (!ReversePtrDepsToAdd.empty()) { 1505 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] 1506 .insert(ReversePtrDepsToAdd.back().second); 1507 ReversePtrDepsToAdd.pop_back(); 1508 } 1509 } 1510 1511 1512 assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); 1513 AA->deleteValue(RemInst); 1514 DEBUG(verifyRemoved(RemInst)); 1515 } 1516 /// verifyRemoved - Verify that the specified instruction does not occur 1517 /// in our internal data structures. 1518 void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { 1519 for (LocalDepMapType::const_iterator I = LocalDeps.begin(), 1520 E = LocalDeps.end(); I != E; ++I) { 1521 assert(I->first != D && "Inst occurs in data structures"); 1522 assert(I->second.getInst() != D && 1523 "Inst occurs in data structures"); 1524 } 1525 1526 for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), 1527 E = NonLocalPointerDeps.end(); I != E; ++I) { 1528 assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); 1529 const NonLocalDepInfo &Val = I->second.NonLocalDeps; 1530 for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); 1531 II != E; ++II) 1532 assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); 1533 } 1534 1535 for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), 1536 E = NonLocalDeps.end(); I != E; ++I) { 1537 assert(I->first != D && "Inst occurs in data structures"); 1538 const PerInstNLInfo &INLD = I->second; 1539 for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), 1540 EE = INLD.first.end(); II != EE; ++II) 1541 assert(II->getResult().getInst() != D && "Inst occurs in data structures"); 1542 } 1543 1544 for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), 1545 E = ReverseLocalDeps.end(); I != E; ++I) { 1546 assert(I->first != D && "Inst occurs in data structures"); 1547 for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), 1548 EE = I->second.end(); II != EE; ++II) 1549 assert(*II != D && "Inst occurs in data structures"); 1550 } 1551 1552 for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), 1553 E = ReverseNonLocalDeps.end(); 1554 I != E; ++I) { 1555 assert(I->first != D && "Inst occurs in data structures"); 1556 for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), 1557 EE = I->second.end(); II != EE; ++II) 1558 assert(*II != D && "Inst occurs in data structures"); 1559 } 1560 1561 for (ReverseNonLocalPtrDepTy::const_iterator 1562 I = ReverseNonLocalPtrDeps.begin(), 1563 E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { 1564 assert(I->first != D && "Inst occurs in rev NLPD map"); 1565 1566 for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(), 1567 E = I->second.end(); II != E; ++II) 1568 assert(*II != ValueIsLoadPair(D, false) && 1569 *II != ValueIsLoadPair(D, true) && 1570 "Inst occurs in ReverseNonLocalPtrDeps map"); 1571 } 1572 1573 } 1574