Home | History | Annotate | Download | only in Analysis
      1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements an analysis that determines, for a given memory
     11 // operation, what preceding memory operations it depends on.  It builds on
     12 // alias analysis information, and tries to provide a lazy, caching interface to
     13 // a common kind of alias information query.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
     18 #include "llvm/ADT/STLExtras.h"
     19 #include "llvm/ADT/Statistic.h"
     20 #include "llvm/Analysis/AliasAnalysis.h"
     21 #include "llvm/Analysis/AssumptionCache.h"
     22 #include "llvm/Analysis/InstructionSimplify.h"
     23 #include "llvm/Analysis/MemoryBuiltins.h"
     24 #include "llvm/Analysis/PHITransAddr.h"
     25 #include "llvm/Analysis/OrderedBasicBlock.h"
     26 #include "llvm/Analysis/ValueTracking.h"
     27 #include "llvm/Analysis/TargetLibraryInfo.h"
     28 #include "llvm/IR/DataLayout.h"
     29 #include "llvm/IR/Dominators.h"
     30 #include "llvm/IR/Function.h"
     31 #include "llvm/IR/Instructions.h"
     32 #include "llvm/IR/IntrinsicInst.h"
     33 #include "llvm/IR/LLVMContext.h"
     34 #include "llvm/IR/PredIteratorCache.h"
     35 #include "llvm/Support/Debug.h"
     36 using namespace llvm;
     37 
     38 #define DEBUG_TYPE "memdep"
     39 
     40 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
     41 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
     42 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
     43 
     44 STATISTIC(NumCacheNonLocalPtr,
     45           "Number of fully cached non-local ptr responses");
     46 STATISTIC(NumCacheDirtyNonLocalPtr,
     47           "Number of cached, but dirty, non-local ptr responses");
     48 STATISTIC(NumUncacheNonLocalPtr,
     49           "Number of uncached non-local ptr responses");
     50 STATISTIC(NumCacheCompleteNonLocalPtr,
     51           "Number of block queries that were completely cached");
     52 
     53 // Limit for the number of instructions to scan in a block.
     54 
     55 static cl::opt<unsigned> BlockScanLimit(
     56     "memdep-block-scan-limit", cl::Hidden, cl::init(100),
     57     cl::desc("The number of instructions to scan in a block in memory "
     58              "dependency analysis (default = 100)"));
     59 
     60 // Limit on the number of memdep results to process.
     61 static const unsigned int NumResultsLimit = 100;
     62 
     63 char MemoryDependenceAnalysis::ID = 0;
     64 
     65 // Register this pass...
     66 INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
     67                 "Memory Dependence Analysis", false, true)
     68 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
     69 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     70 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
     71 INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
     72                       "Memory Dependence Analysis", false, true)
     73 
     74 MemoryDependenceAnalysis::MemoryDependenceAnalysis()
     75     : FunctionPass(ID) {
     76   initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
     77 }
     78 MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
     79 }
     80 
     81 /// Clean up memory in between runs
     82 void MemoryDependenceAnalysis::releaseMemory() {
     83   LocalDeps.clear();
     84   NonLocalDeps.clear();
     85   NonLocalPointerDeps.clear();
     86   ReverseLocalDeps.clear();
     87   ReverseNonLocalDeps.clear();
     88   ReverseNonLocalPtrDeps.clear();
     89   PredCache.clear();
     90 }
     91 
     92 /// getAnalysisUsage - Does not modify anything.  It uses Alias Analysis.
     93 ///
     94 void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
     95   AU.setPreservesAll();
     96   AU.addRequired<AssumptionCacheTracker>();
     97   AU.addRequiredTransitive<AAResultsWrapperPass>();
     98   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
     99 }
    100 
    101 bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
    102   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    103   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
    104   DominatorTreeWrapperPass *DTWP =
    105       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
    106   DT = DTWP ? &DTWP->getDomTree() : nullptr;
    107   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
    108   return false;
    109 }
    110 
    111 /// RemoveFromReverseMap - This is a helper function that removes Val from
    112 /// 'Inst's set in ReverseMap.  If the set becomes empty, remove Inst's entry.
    113 template <typename KeyTy>
    114 static void RemoveFromReverseMap(DenseMap<Instruction*,
    115                                  SmallPtrSet<KeyTy, 4> > &ReverseMap,
    116                                  Instruction *Inst, KeyTy Val) {
    117   typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
    118   InstIt = ReverseMap.find(Inst);
    119   assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
    120   bool Found = InstIt->second.erase(Val);
    121   assert(Found && "Invalid reverse map!"); (void)Found;
    122   if (InstIt->second.empty())
    123     ReverseMap.erase(InstIt);
    124 }
    125 
    126 /// GetLocation - If the given instruction references a specific memory
    127 /// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
    128 /// Return a ModRefInfo value describing the general behavior of the
    129 /// instruction.
    130 static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
    131                               const TargetLibraryInfo &TLI) {
    132   if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    133     if (LI->isUnordered()) {
    134       Loc = MemoryLocation::get(LI);
    135       return MRI_Ref;
    136     }
    137     if (LI->getOrdering() == Monotonic) {
    138       Loc = MemoryLocation::get(LI);
    139       return MRI_ModRef;
    140     }
    141     Loc = MemoryLocation();
    142     return MRI_ModRef;
    143   }
    144 
    145   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    146     if (SI->isUnordered()) {
    147       Loc = MemoryLocation::get(SI);
    148       return MRI_Mod;
    149     }
    150     if (SI->getOrdering() == Monotonic) {
    151       Loc = MemoryLocation::get(SI);
    152       return MRI_ModRef;
    153     }
    154     Loc = MemoryLocation();
    155     return MRI_ModRef;
    156   }
    157 
    158   if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
    159     Loc = MemoryLocation::get(V);
    160     return MRI_ModRef;
    161   }
    162 
    163   if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
    164     // calls to free() deallocate the entire structure
    165     Loc = MemoryLocation(CI->getArgOperand(0));
    166     return MRI_Mod;
    167   }
    168 
    169   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    170     AAMDNodes AAInfo;
    171 
    172     switch (II->getIntrinsicID()) {
    173     case Intrinsic::lifetime_start:
    174     case Intrinsic::lifetime_end:
    175     case Intrinsic::invariant_start:
    176       II->getAAMetadata(AAInfo);
    177       Loc = MemoryLocation(
    178           II->getArgOperand(1),
    179           cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
    180       // These intrinsics don't really modify the memory, but returning Mod
    181       // will allow them to be handled conservatively.
    182       return MRI_Mod;
    183     case Intrinsic::invariant_end:
    184       II->getAAMetadata(AAInfo);
    185       Loc = MemoryLocation(
    186           II->getArgOperand(2),
    187           cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
    188       // These intrinsics don't really modify the memory, but returning Mod
    189       // will allow them to be handled conservatively.
    190       return MRI_Mod;
    191     default:
    192       break;
    193     }
    194   }
    195 
    196   // Otherwise, just do the coarse-grained thing that always works.
    197   if (Inst->mayWriteToMemory())
    198     return MRI_ModRef;
    199   if (Inst->mayReadFromMemory())
    200     return MRI_Ref;
    201   return MRI_NoModRef;
    202 }
    203 
    204 /// getCallSiteDependencyFrom - Private helper for finding the local
    205 /// dependencies of a call site.
    206 MemDepResult MemoryDependenceAnalysis::
    207 getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
    208                           BasicBlock::iterator ScanIt, BasicBlock *BB) {
    209   unsigned Limit = BlockScanLimit;
    210 
    211   // Walk backwards through the block, looking for dependencies
    212   while (ScanIt != BB->begin()) {
    213     // Limit the amount of scanning we do so we don't end up with quadratic
    214     // running time on extreme testcases.
    215     --Limit;
    216     if (!Limit)
    217       return MemDepResult::getUnknown();
    218 
    219     Instruction *Inst = &*--ScanIt;
    220 
    221     // If this inst is a memory op, get the pointer it accessed
    222     MemoryLocation Loc;
    223     ModRefInfo MR = GetLocation(Inst, Loc, *TLI);
    224     if (Loc.Ptr) {
    225       // A simple instruction.
    226       if (AA->getModRefInfo(CS, Loc) != MRI_NoModRef)
    227         return MemDepResult::getClobber(Inst);
    228       continue;
    229     }
    230 
    231     if (auto InstCS = CallSite(Inst)) {
    232       // Debug intrinsics don't cause dependences.
    233       if (isa<DbgInfoIntrinsic>(Inst)) continue;
    234       // If these two calls do not interfere, look past it.
    235       switch (AA->getModRefInfo(CS, InstCS)) {
    236       case MRI_NoModRef:
    237         // If the two calls are the same, return InstCS as a Def, so that
    238         // CS can be found redundant and eliminated.
    239         if (isReadOnlyCall && !(MR & MRI_Mod) &&
    240             CS.getInstruction()->isIdenticalToWhenDefined(Inst))
    241           return MemDepResult::getDef(Inst);
    242 
    243         // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
    244         // keep scanning.
    245         continue;
    246       default:
    247         return MemDepResult::getClobber(Inst);
    248       }
    249     }
    250 
    251     // If we could not obtain a pointer for the instruction and the instruction
    252     // touches memory then assume that this is a dependency.
    253     if (MR != MRI_NoModRef)
    254       return MemDepResult::getClobber(Inst);
    255   }
    256 
    257   // No dependence found.  If this is the entry block of the function, it is
    258   // unknown, otherwise it is non-local.
    259   if (BB != &BB->getParent()->getEntryBlock())
    260     return MemDepResult::getNonLocal();
    261   return MemDepResult::getNonFuncLocal();
    262 }
    263 
    264 /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
    265 /// would fully overlap MemLoc if done as a wider legal integer load.
    266 ///
    267 /// MemLocBase, MemLocOffset are lazily computed here the first time the
    268 /// base/offs of memloc is needed.
    269 static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
    270                                                    const Value *&MemLocBase,
    271                                                    int64_t &MemLocOffs,
    272                                                    const LoadInst *LI) {
    273   const DataLayout &DL = LI->getModule()->getDataLayout();
    274 
    275   // If we haven't already computed the base/offset of MemLoc, do so now.
    276   if (!MemLocBase)
    277     MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
    278 
    279   unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
    280       MemLocBase, MemLocOffs, MemLoc.Size, LI);
    281   return Size != 0;
    282 }
    283 
    284 /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
    285 /// looks at a memory location for a load (specified by MemLocBase, Offs,
    286 /// and Size) and compares it against a load.  If the specified load could
    287 /// be safely widened to a larger integer load that is 1) still efficient,
    288 /// 2) safe for the target, and 3) would provide the specified memory
    289 /// location value, then this function returns the size in bytes of the
    290 /// load width to use.  If not, this returns zero.
    291 unsigned MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
    292     const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
    293     const LoadInst *LI) {
    294   // We can only extend simple integer loads.
    295   if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
    296 
    297   // Load widening is hostile to ThreadSanitizer: it may cause false positives
    298   // or make the reports more cryptic (access sizes are wrong).
    299   if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
    300     return 0;
    301 
    302   const DataLayout &DL = LI->getModule()->getDataLayout();
    303 
    304   // Get the base of this load.
    305   int64_t LIOffs = 0;
    306   const Value *LIBase =
    307       GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
    308 
    309   // If the two pointers are not based on the same pointer, we can't tell that
    310   // they are related.
    311   if (LIBase != MemLocBase) return 0;
    312 
    313   // Okay, the two values are based on the same pointer, but returned as
    314   // no-alias.  This happens when we have things like two byte loads at "P+1"
    315   // and "P+3".  Check to see if increasing the size of the "LI" load up to its
    316   // alignment (or the largest native integer type) will allow us to load all
    317   // the bits required by MemLoc.
    318 
    319   // If MemLoc is before LI, then no widening of LI will help us out.
    320   if (MemLocOffs < LIOffs) return 0;
    321 
    322   // Get the alignment of the load in bytes.  We assume that it is safe to load
    323   // any legal integer up to this size without a problem.  For example, if we're
    324   // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
    325   // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
    326   // to i16.
    327   unsigned LoadAlign = LI->getAlignment();
    328 
    329   int64_t MemLocEnd = MemLocOffs+MemLocSize;
    330 
    331   // If no amount of rounding up will let MemLoc fit into LI, then bail out.
    332   if (LIOffs+LoadAlign < MemLocEnd) return 0;
    333 
    334   // This is the size of the load to try.  Start with the next larger power of
    335   // two.
    336   unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
    337   NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
    338 
    339   while (1) {
    340     // If this load size is bigger than our known alignment or would not fit
    341     // into a native integer register, then we fail.
    342     if (NewLoadByteSize > LoadAlign ||
    343         !DL.fitsInLegalInteger(NewLoadByteSize*8))
    344       return 0;
    345 
    346     if (LIOffs + NewLoadByteSize > MemLocEnd &&
    347         LI->getParent()->getParent()->hasFnAttribute(
    348             Attribute::SanitizeAddress))
    349       // We will be reading past the location accessed by the original program.
    350       // While this is safe in a regular build, Address Safety analysis tools
    351       // may start reporting false warnings. So, don't do widening.
    352       return 0;
    353 
    354     // If a load of this width would include all of MemLoc, then we succeed.
    355     if (LIOffs+NewLoadByteSize >= MemLocEnd)
    356       return NewLoadByteSize;
    357 
    358     NewLoadByteSize <<= 1;
    359   }
    360 }
    361 
    362 static bool isVolatile(Instruction *Inst) {
    363   if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
    364     return LI->isVolatile();
    365   else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
    366     return SI->isVolatile();
    367   else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
    368     return AI->isVolatile();
    369   return false;
    370 }
    371 
    372 
    373 /// getPointerDependencyFrom - Return the instruction on which a memory
    374 /// location depends.  If isLoad is true, this routine ignores may-aliases with
    375 /// read-only operations.  If isLoad is false, this routine ignores may-aliases
    376 /// with reads from read-only locations.  If possible, pass the query
    377 /// instruction as well; this function may take advantage of the metadata
    378 /// annotated to the query instruction to refine the result.
    379 MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
    380     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    381     BasicBlock *BB, Instruction *QueryInst) {
    382 
    383   if (QueryInst != nullptr) {
    384     if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
    385       MemDepResult invariantGroupDependency =
    386           getInvariantGroupPointerDependency(LI, BB);
    387 
    388       if (invariantGroupDependency.isDef())
    389         return invariantGroupDependency;
    390     }
    391   }
    392   return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
    393 }
    394 
    395 MemDepResult
    396 MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI,
    397                                                              BasicBlock *BB) {
    398   Value *LoadOperand = LI->getPointerOperand();
    399   // It's is not safe to walk the use list of global value, because function
    400   // passes aren't allowed to look outside their functions.
    401   if (isa<GlobalValue>(LoadOperand))
    402     return MemDepResult::getUnknown();
    403 
    404   auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
    405   if (!InvariantGroupMD)
    406     return MemDepResult::getUnknown();
    407 
    408   MemDepResult Result = MemDepResult::getUnknown();
    409   llvm::SmallSet<Value *, 14> Seen;
    410   // Queue to process all pointers that are equivalent to load operand.
    411   llvm::SmallVector<Value *, 8> LoadOperandsQueue;
    412   LoadOperandsQueue.push_back(LoadOperand);
    413   while (!LoadOperandsQueue.empty()) {
    414     Value *Ptr = LoadOperandsQueue.pop_back_val();
    415     if (isa<GlobalValue>(Ptr))
    416       continue;
    417 
    418     if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
    419       if (!Seen.count(BCI->getOperand(0))) {
    420         LoadOperandsQueue.push_back(BCI->getOperand(0));
    421         Seen.insert(BCI->getOperand(0));
    422       }
    423     }
    424 
    425     for (Use &Us : Ptr->uses()) {
    426       auto *U = dyn_cast<Instruction>(Us.getUser());
    427       if (!U || U == LI || !DT->dominates(U, LI))
    428         continue;
    429 
    430       if (auto *BCI = dyn_cast<BitCastInst>(U)) {
    431         if (!Seen.count(BCI)) {
    432           LoadOperandsQueue.push_back(BCI);
    433           Seen.insert(BCI);
    434         }
    435         continue;
    436       }
    437       // If we hit load/store with the same invariant.group metadata (and the
    438       // same pointer operand) we can assume that value pointed by pointer
    439       // operand didn't change.
    440       if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
    441           U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
    442         return MemDepResult::getDef(U);
    443     }
    444   }
    445   return Result;
    446 }
    447 
    448 MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
    449     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    450     BasicBlock *BB, Instruction *QueryInst) {
    451 
    452   const Value *MemLocBase = nullptr;
    453   int64_t MemLocOffset = 0;
    454   unsigned Limit = BlockScanLimit;
    455   bool isInvariantLoad = false;
    456 
    457   // We must be careful with atomic accesses, as they may allow another thread
    458   //   to touch this location, cloberring it. We are conservative: if the
    459   //   QueryInst is not a simple (non-atomic) memory access, we automatically
    460   //   return getClobber.
    461   // If it is simple, we know based on the results of
    462   // "Compiler testing via a theory of sound optimisations in the C11/C++11
    463   //   memory model" in PLDI 2013, that a non-atomic location can only be
    464   //   clobbered between a pair of a release and an acquire action, with no
    465   //   access to the location in between.
    466   // Here is an example for giving the general intuition behind this rule.
    467   // In the following code:
    468   //   store x 0;
    469   //   release action; [1]
    470   //   acquire action; [4]
    471   //   %val = load x;
    472   // It is unsafe to replace %val by 0 because another thread may be running:
    473   //   acquire action; [2]
    474   //   store x 42;
    475   //   release action; [3]
    476   // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
    477   // being 42. A key property of this program however is that if either
    478   // 1 or 4 were missing, there would be a race between the store of 42
    479   // either the store of 0 or the load (making the whole progam racy).
    480   // The paper mentionned above shows that the same property is respected
    481   // by every program that can detect any optimisation of that kind: either
    482   // it is racy (undefined) or there is a release followed by an acquire
    483   // between the pair of accesses under consideration.
    484 
    485   // If the load is invariant, we "know" that it doesn't alias *any* write. We
    486   // do want to respect mustalias results since defs are useful for value
    487   // forwarding, but any mayalias write can be assumed to be noalias.
    488   // Arguably, this logic should be pushed inside AliasAnalysis itself.
    489   if (isLoad && QueryInst) {
    490     LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
    491     if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
    492       isInvariantLoad = true;
    493   }
    494 
    495   const DataLayout &DL = BB->getModule()->getDataLayout();
    496 
    497   // Create a numbered basic block to lazily compute and cache instruction
    498   // positions inside a BB. This is used to provide fast queries for relative
    499   // position between two instructions in a BB and can be used by
    500   // AliasAnalysis::callCapturesBefore.
    501   OrderedBasicBlock OBB(BB);
    502 
    503   // Walk backwards through the basic block, looking for dependencies.
    504   while (ScanIt != BB->begin()) {
    505     Instruction *Inst = &*--ScanIt;
    506 
    507     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
    508       // Debug intrinsics don't (and can't) cause dependencies.
    509       if (isa<DbgInfoIntrinsic>(II)) continue;
    510 
    511     // Limit the amount of scanning we do so we don't end up with quadratic
    512     // running time on extreme testcases.
    513     --Limit;
    514     if (!Limit)
    515       return MemDepResult::getUnknown();
    516 
    517     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    518       // If we reach a lifetime begin or end marker, then the query ends here
    519       // because the value is undefined.
    520       if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
    521         // FIXME: This only considers queries directly on the invariant-tagged
    522         // pointer, not on query pointers that are indexed off of them.  It'd
    523         // be nice to handle that at some point (the right approach is to use
    524         // GetPointerBaseWithConstantOffset).
    525         if (AA->isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
    526           return MemDepResult::getDef(II);
    527         continue;
    528       }
    529     }
    530 
    531     // Values depend on loads if the pointers are must aliased.  This means that
    532     // a load depends on another must aliased load from the same value.
    533     // One exception is atomic loads: a value can depend on an atomic load that it
    534     // does not alias with when this atomic load indicates that another thread may
    535     // be accessing the location.
    536     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    537 
    538       // While volatile access cannot be eliminated, they do not have to clobber
    539       // non-aliasing locations, as normal accesses, for example, can be safely
    540       // reordered with volatile accesses.
    541       if (LI->isVolatile()) {
    542         if (!QueryInst)
    543           // Original QueryInst *may* be volatile
    544           return MemDepResult::getClobber(LI);
    545         if (isVolatile(QueryInst))
    546           // Ordering required if QueryInst is itself volatile
    547           return MemDepResult::getClobber(LI);
    548         // Otherwise, volatile doesn't imply any special ordering
    549       }
    550 
    551       // Atomic loads have complications involved.
    552       // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
    553       // FIXME: This is overly conservative.
    554       if (LI->isAtomic() && LI->getOrdering() > Unordered) {
    555         if (!QueryInst)
    556           return MemDepResult::getClobber(LI);
    557         if (LI->getOrdering() != Monotonic)
    558           return MemDepResult::getClobber(LI);
    559         if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
    560           if (!QueryLI->isSimple())
    561             return MemDepResult::getClobber(LI);
    562         } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
    563           if (!QuerySI->isSimple())
    564             return MemDepResult::getClobber(LI);
    565         } else if (QueryInst->mayReadOrWriteMemory()) {
    566           return MemDepResult::getClobber(LI);
    567         }
    568       }
    569 
    570       MemoryLocation LoadLoc = MemoryLocation::get(LI);
    571 
    572       // If we found a pointer, check if it could be the same as our pointer.
    573       AliasResult R = AA->alias(LoadLoc, MemLoc);
    574 
    575       if (isLoad) {
    576         if (R == NoAlias) {
    577           // If this is an over-aligned integer load (for example,
    578           // "load i8* %P, align 4") see if it would obviously overlap with the
    579           // queried location if widened to a larger load (e.g. if the queried
    580           // location is 1 byte at P+1).  If so, return it as a load/load
    581           // clobber result, allowing the client to decide to widen the load if
    582           // it wants to.
    583           if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
    584             if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
    585                 isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
    586                                                        MemLocOffset, LI))
    587               return MemDepResult::getClobber(Inst);
    588           }
    589           continue;
    590         }
    591 
    592         // Must aliased loads are defs of each other.
    593         if (R == MustAlias)
    594           return MemDepResult::getDef(Inst);
    595 
    596 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
    597       // in terms of clobbering loads, but since it does this by looking
    598       // at the clobbering load directly, it doesn't know about any
    599       // phi translation that may have happened along the way.
    600 
    601         // If we have a partial alias, then return this as a clobber for the
    602         // client to handle.
    603         if (R == PartialAlias)
    604           return MemDepResult::getClobber(Inst);
    605 #endif
    606 
    607         // Random may-alias loads don't depend on each other without a
    608         // dependence.
    609         continue;
    610       }
    611 
    612       // Stores don't depend on other no-aliased accesses.
    613       if (R == NoAlias)
    614         continue;
    615 
    616       // Stores don't alias loads from read-only memory.
    617       if (AA->pointsToConstantMemory(LoadLoc))
    618         continue;
    619 
    620       // Stores depend on may/must aliased loads.
    621       return MemDepResult::getDef(Inst);
    622     }
    623 
    624     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    625       // Atomic stores have complications involved.
    626       // A Monotonic store is OK if the query inst is itself not atomic.
    627       // FIXME: This is overly conservative.
    628       if (!SI->isUnordered()) {
    629         if (!QueryInst)
    630           return MemDepResult::getClobber(SI);
    631         if (SI->getOrdering() != Monotonic)
    632           return MemDepResult::getClobber(SI);
    633         if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
    634           if (!QueryLI->isSimple())
    635             return MemDepResult::getClobber(SI);
    636         } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
    637           if (!QuerySI->isSimple())
    638             return MemDepResult::getClobber(SI);
    639         } else if (QueryInst->mayReadOrWriteMemory()) {
    640           return MemDepResult::getClobber(SI);
    641         }
    642       }
    643 
    644       // FIXME: this is overly conservative.
    645       // While volatile access cannot be eliminated, they do not have to clobber
    646       // non-aliasing locations, as normal accesses can for example be reordered
    647       // with volatile accesses.
    648       if (SI->isVolatile())
    649         return MemDepResult::getClobber(SI);
    650 
    651       // If alias analysis can tell that this store is guaranteed to not modify
    652       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
    653       // the query pointer points to constant memory etc.
    654       if (AA->getModRefInfo(SI, MemLoc) == MRI_NoModRef)
    655         continue;
    656 
    657       // Ok, this store might clobber the query pointer.  Check to see if it is
    658       // a must alias: in this case, we want to return this as a def.
    659       MemoryLocation StoreLoc = MemoryLocation::get(SI);
    660 
    661       // If we found a pointer, check if it could be the same as our pointer.
    662       AliasResult R = AA->alias(StoreLoc, MemLoc);
    663 
    664       if (R == NoAlias)
    665         continue;
    666       if (R == MustAlias)
    667         return MemDepResult::getDef(Inst);
    668       if (isInvariantLoad)
    669        continue;
    670       return MemDepResult::getClobber(Inst);
    671     }
    672 
    673     // If this is an allocation, and if we know that the accessed pointer is to
    674     // the allocation, return Def.  This means that there is no dependence and
    675     // the access can be optimized based on that.  For example, a load could
    676     // turn into undef.
    677     // Note: Only determine this to be a malloc if Inst is the malloc call, not
    678     // a subsequent bitcast of the malloc call result.  There can be stores to
    679     // the malloced memory between the malloc call and its bitcast uses, and we
    680     // need to continue scanning until the malloc call.
    681     if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
    682       const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
    683 
    684       if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
    685         return MemDepResult::getDef(Inst);
    686       if (isInvariantLoad)
    687         continue;
    688       // Be conservative if the accessed pointer may alias the allocation.
    689       if (AA->alias(Inst, AccessPtr) != NoAlias)
    690         return MemDepResult::getClobber(Inst);
    691       // If the allocation is not aliased and does not read memory (like
    692       // strdup), it is safe to ignore.
    693       if (isa<AllocaInst>(Inst) ||
    694           isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
    695         continue;
    696     }
    697 
    698     if (isInvariantLoad)
    699        continue;
    700 
    701     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
    702     ModRefInfo MR = AA->getModRefInfo(Inst, MemLoc);
    703     // If necessary, perform additional analysis.
    704     if (MR == MRI_ModRef)
    705       MR = AA->callCapturesBefore(Inst, MemLoc, DT, &OBB);
    706     switch (MR) {
    707     case MRI_NoModRef:
    708       // If the call has no effect on the queried pointer, just ignore it.
    709       continue;
    710     case MRI_Mod:
    711       return MemDepResult::getClobber(Inst);
    712     case MRI_Ref:
    713       // If the call is known to never store to the pointer, and if this is a
    714       // load query, we can safely ignore it (scan past it).
    715       if (isLoad)
    716         continue;
    717     default:
    718       // Otherwise, there is a potential dependence.  Return a clobber.
    719       return MemDepResult::getClobber(Inst);
    720     }
    721   }
    722 
    723   // No dependence found.  If this is the entry block of the function, it is
    724   // unknown, otherwise it is non-local.
    725   if (BB != &BB->getParent()->getEntryBlock())
    726     return MemDepResult::getNonLocal();
    727   return MemDepResult::getNonFuncLocal();
    728 }
    729 
    730 /// getDependency - Return the instruction on which a memory operation
    731 /// depends.
    732 MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
    733   Instruction *ScanPos = QueryInst;
    734 
    735   // Check for a cached result
    736   MemDepResult &LocalCache = LocalDeps[QueryInst];
    737 
    738   // If the cached entry is non-dirty, just return it.  Note that this depends
    739   // on MemDepResult's default constructing to 'dirty'.
    740   if (!LocalCache.isDirty())
    741     return LocalCache;
    742 
    743   // Otherwise, if we have a dirty entry, we know we can start the scan at that
    744   // instruction, which may save us some work.
    745   if (Instruction *Inst = LocalCache.getInst()) {
    746     ScanPos = Inst;
    747 
    748     RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
    749   }
    750 
    751   BasicBlock *QueryParent = QueryInst->getParent();
    752 
    753   // Do the scan.
    754   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
    755     // No dependence found.  If this is the entry block of the function, it is
    756     // unknown, otherwise it is non-local.
    757     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
    758       LocalCache = MemDepResult::getNonLocal();
    759     else
    760       LocalCache = MemDepResult::getNonFuncLocal();
    761   } else {
    762     MemoryLocation MemLoc;
    763     ModRefInfo MR = GetLocation(QueryInst, MemLoc, *TLI);
    764     if (MemLoc.Ptr) {
    765       // If we can do a pointer scan, make it happen.
    766       bool isLoad = !(MR & MRI_Mod);
    767       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
    768         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
    769 
    770       LocalCache = getPointerDependencyFrom(
    771           MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
    772     } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
    773       CallSite QueryCS(QueryInst);
    774       bool isReadOnly = AA->onlyReadsMemory(QueryCS);
    775       LocalCache = getCallSiteDependencyFrom(
    776           QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
    777     } else
    778       // Non-memory instruction.
    779       LocalCache = MemDepResult::getUnknown();
    780   }
    781 
    782   // Remember the result!
    783   if (Instruction *I = LocalCache.getInst())
    784     ReverseLocalDeps[I].insert(QueryInst);
    785 
    786   return LocalCache;
    787 }
    788 
    789 #ifndef NDEBUG
    790 /// AssertSorted - This method is used when -debug is specified to verify that
    791 /// cache arrays are properly kept sorted.
    792 static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
    793                          int Count = -1) {
    794   if (Count == -1) Count = Cache.size();
    795   if (Count == 0) return;
    796 
    797   for (unsigned i = 1; i != unsigned(Count); ++i)
    798     assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
    799 }
    800 #endif
    801 
    802 /// getNonLocalCallDependency - Perform a full dependency query for the
    803 /// specified call, returning the set of blocks that the value is
    804 /// potentially live across.  The returned set of results will include a
    805 /// "NonLocal" result for all blocks where the value is live across.
    806 ///
    807 /// This method assumes the instruction returns a "NonLocal" dependency
    808 /// within its own block.
    809 ///
    810 /// This returns a reference to an internal data structure that may be
    811 /// invalidated on the next non-local query or when an instruction is
    812 /// removed.  Clients must copy this data if they want it around longer than
    813 /// that.
    814 const MemoryDependenceAnalysis::NonLocalDepInfo &
    815 MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
    816   assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
    817  "getNonLocalCallDependency should only be used on calls with non-local deps!");
    818   PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
    819   NonLocalDepInfo &Cache = CacheP.first;
    820 
    821   /// DirtyBlocks - This is the set of blocks that need to be recomputed.  In
    822   /// the cached case, this can happen due to instructions being deleted etc. In
    823   /// the uncached case, this starts out as the set of predecessors we care
    824   /// about.
    825   SmallVector<BasicBlock*, 32> DirtyBlocks;
    826 
    827   if (!Cache.empty()) {
    828     // Okay, we have a cache entry.  If we know it is not dirty, just return it
    829     // with no computation.
    830     if (!CacheP.second) {
    831       ++NumCacheNonLocal;
    832       return Cache;
    833     }
    834 
    835     // If we already have a partially computed set of results, scan them to
    836     // determine what is dirty, seeding our initial DirtyBlocks worklist.
    837     for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
    838        I != E; ++I)
    839       if (I->getResult().isDirty())
    840         DirtyBlocks.push_back(I->getBB());
    841 
    842     // Sort the cache so that we can do fast binary search lookups below.
    843     std::sort(Cache.begin(), Cache.end());
    844 
    845     ++NumCacheDirtyNonLocal;
    846     //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
    847     //     << Cache.size() << " cached: " << *QueryInst;
    848   } else {
    849     // Seed DirtyBlocks with each of the preds of QueryInst's block.
    850     BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
    851     for (BasicBlock *Pred : PredCache.get(QueryBB))
    852       DirtyBlocks.push_back(Pred);
    853     ++NumUncacheNonLocal;
    854   }
    855 
    856   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
    857   bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
    858 
    859   SmallPtrSet<BasicBlock*, 64> Visited;
    860 
    861   unsigned NumSortedEntries = Cache.size();
    862   DEBUG(AssertSorted(Cache));
    863 
    864   // Iterate while we still have blocks to update.
    865   while (!DirtyBlocks.empty()) {
    866     BasicBlock *DirtyBB = DirtyBlocks.back();
    867     DirtyBlocks.pop_back();
    868 
    869     // Already processed this block?
    870     if (!Visited.insert(DirtyBB).second)
    871       continue;
    872 
    873     // Do a binary search to see if we already have an entry for this block in
    874     // the cache set.  If so, find it.
    875     DEBUG(AssertSorted(Cache, NumSortedEntries));
    876     NonLocalDepInfo::iterator Entry =
    877       std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
    878                        NonLocalDepEntry(DirtyBB));
    879     if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
    880       --Entry;
    881 
    882     NonLocalDepEntry *ExistingResult = nullptr;
    883     if (Entry != Cache.begin()+NumSortedEntries &&
    884         Entry->getBB() == DirtyBB) {
    885       // If we already have an entry, and if it isn't already dirty, the block
    886       // is done.
    887       if (!Entry->getResult().isDirty())
    888         continue;
    889 
    890       // Otherwise, remember this slot so we can update the value.
    891       ExistingResult = &*Entry;
    892     }
    893 
    894     // If the dirty entry has a pointer, start scanning from it so we don't have
    895     // to rescan the entire block.
    896     BasicBlock::iterator ScanPos = DirtyBB->end();
    897     if (ExistingResult) {
    898       if (Instruction *Inst = ExistingResult->getResult().getInst()) {
    899         ScanPos = Inst->getIterator();
    900         // We're removing QueryInst's use of Inst.
    901         RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
    902                              QueryCS.getInstruction());
    903       }
    904     }
    905 
    906     // Find out if this block has a local dependency for QueryInst.
    907     MemDepResult Dep;
    908 
    909     if (ScanPos != DirtyBB->begin()) {
    910       Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
    911     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
    912       // No dependence found.  If this is the entry block of the function, it is
    913       // a clobber, otherwise it is unknown.
    914       Dep = MemDepResult::getNonLocal();
    915     } else {
    916       Dep = MemDepResult::getNonFuncLocal();
    917     }
    918 
    919     // If we had a dirty entry for the block, update it.  Otherwise, just add
    920     // a new entry.
    921     if (ExistingResult)
    922       ExistingResult->setResult(Dep);
    923     else
    924       Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
    925 
    926     // If the block has a dependency (i.e. it isn't completely transparent to
    927     // the value), remember the association!
    928     if (!Dep.isNonLocal()) {
    929       // Keep the ReverseNonLocalDeps map up to date so we can efficiently
    930       // update this when we remove instructions.
    931       if (Instruction *Inst = Dep.getInst())
    932         ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
    933     } else {
    934 
    935       // If the block *is* completely transparent to the load, we need to check
    936       // the predecessors of this block.  Add them to our worklist.
    937       for (BasicBlock *Pred : PredCache.get(DirtyBB))
    938         DirtyBlocks.push_back(Pred);
    939     }
    940   }
    941 
    942   return Cache;
    943 }
    944 
    945 /// getNonLocalPointerDependency - Perform a full dependency query for an
    946 /// access to the specified (non-volatile) memory location, returning the
    947 /// set of instructions that either define or clobber the value.
    948 ///
    949 /// This method assumes the pointer has a "NonLocal" dependency within its
    950 /// own block.
    951 ///
    952 void MemoryDependenceAnalysis::
    953 getNonLocalPointerDependency(Instruction *QueryInst,
    954                              SmallVectorImpl<NonLocalDepResult> &Result) {
    955   const MemoryLocation Loc = MemoryLocation::get(QueryInst);
    956   bool isLoad = isa<LoadInst>(QueryInst);
    957   BasicBlock *FromBB = QueryInst->getParent();
    958   assert(FromBB);
    959 
    960   assert(Loc.Ptr->getType()->isPointerTy() &&
    961          "Can't get pointer deps of a non-pointer!");
    962   Result.clear();
    963 
    964   // This routine does not expect to deal with volatile instructions.
    965   // Doing so would require piping through the QueryInst all the way through.
    966   // TODO: volatiles can't be elided, but they can be reordered with other
    967   // non-volatile accesses.
    968 
    969   // We currently give up on any instruction which is ordered, but we do handle
    970   // atomic instructions which are unordered.
    971   // TODO: Handle ordered instructions
    972   auto isOrdered = [](Instruction *Inst) {
    973     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    974       return !LI->isUnordered();
    975     } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    976       return !SI->isUnordered();
    977     }
    978     return false;
    979   };
    980   if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
    981     Result.push_back(NonLocalDepResult(FromBB,
    982                                        MemDepResult::getUnknown(),
    983                                        const_cast<Value *>(Loc.Ptr)));
    984     return;
    985   }
    986   const DataLayout &DL = FromBB->getModule()->getDataLayout();
    987   PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC);
    988 
    989   // This is the set of blocks we've inspected, and the pointer we consider in
    990   // each block.  Because of critical edges, we currently bail out if querying
    991   // a block with multiple different pointers.  This can happen during PHI
    992   // translation.
    993   DenseMap<BasicBlock*, Value*> Visited;
    994   if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
    995                                    Result, Visited, true))
    996     return;
    997   Result.clear();
    998   Result.push_back(NonLocalDepResult(FromBB,
    999                                      MemDepResult::getUnknown(),
   1000                                      const_cast<Value *>(Loc.Ptr)));
   1001 }
   1002 
   1003 /// GetNonLocalInfoForBlock - Compute the memdep value for BB with
   1004 /// Pointer/PointeeSize using either cached information in Cache or by doing a
   1005 /// lookup (which may use dirty cache info if available).  If we do a lookup,
   1006 /// add the result to the cache.
   1007 MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock(
   1008     Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
   1009     BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
   1010 
   1011   // Do a binary search to see if we already have an entry for this block in
   1012   // the cache set.  If so, find it.
   1013   NonLocalDepInfo::iterator Entry =
   1014     std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
   1015                      NonLocalDepEntry(BB));
   1016   if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
   1017     --Entry;
   1018 
   1019   NonLocalDepEntry *ExistingResult = nullptr;
   1020   if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
   1021     ExistingResult = &*Entry;
   1022 
   1023   // If we have a cached entry, and it is non-dirty, use it as the value for
   1024   // this dependency.
   1025   if (ExistingResult && !ExistingResult->getResult().isDirty()) {
   1026     ++NumCacheNonLocalPtr;
   1027     return ExistingResult->getResult();
   1028   }
   1029 
   1030   // Otherwise, we have to scan for the value.  If we have a dirty cache
   1031   // entry, start scanning from its position, otherwise we scan from the end
   1032   // of the block.
   1033   BasicBlock::iterator ScanPos = BB->end();
   1034   if (ExistingResult && ExistingResult->getResult().getInst()) {
   1035     assert(ExistingResult->getResult().getInst()->getParent() == BB &&
   1036            "Instruction invalidated?");
   1037     ++NumCacheDirtyNonLocalPtr;
   1038     ScanPos = ExistingResult->getResult().getInst()->getIterator();
   1039 
   1040     // Eliminating the dirty entry from 'Cache', so update the reverse info.
   1041     ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
   1042     RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
   1043   } else {
   1044     ++NumUncacheNonLocalPtr;
   1045   }
   1046 
   1047   // Scan the block for the dependency.
   1048   MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
   1049                                               QueryInst);
   1050 
   1051   // If we had a dirty entry for the block, update it.  Otherwise, just add
   1052   // a new entry.
   1053   if (ExistingResult)
   1054     ExistingResult->setResult(Dep);
   1055   else
   1056     Cache->push_back(NonLocalDepEntry(BB, Dep));
   1057 
   1058   // If the block has a dependency (i.e. it isn't completely transparent to
   1059   // the value), remember the reverse association because we just added it
   1060   // to Cache!
   1061   if (!Dep.isDef() && !Dep.isClobber())
   1062     return Dep;
   1063 
   1064   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
   1065   // update MemDep when we remove instructions.
   1066   Instruction *Inst = Dep.getInst();
   1067   assert(Inst && "Didn't depend on anything?");
   1068   ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
   1069   ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
   1070   return Dep;
   1071 }
   1072 
   1073 /// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain
   1074 /// number of elements in the array that are already properly ordered.  This is
   1075 /// optimized for the case when only a few entries are added.
   1076 static void
   1077 SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
   1078                          unsigned NumSortedEntries) {
   1079   switch (Cache.size() - NumSortedEntries) {
   1080   case 0:
   1081     // done, no new entries.
   1082     break;
   1083   case 2: {
   1084     // Two new entries, insert the last one into place.
   1085     NonLocalDepEntry Val = Cache.back();
   1086     Cache.pop_back();
   1087     MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
   1088       std::upper_bound(Cache.begin(), Cache.end()-1, Val);
   1089     Cache.insert(Entry, Val);
   1090     // FALL THROUGH.
   1091   }
   1092   case 1:
   1093     // One new entry, Just insert the new value at the appropriate position.
   1094     if (Cache.size() != 1) {
   1095       NonLocalDepEntry Val = Cache.back();
   1096       Cache.pop_back();
   1097       MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
   1098         std::upper_bound(Cache.begin(), Cache.end(), Val);
   1099       Cache.insert(Entry, Val);
   1100     }
   1101     break;
   1102   default:
   1103     // Added many values, do a full scale sort.
   1104     std::sort(Cache.begin(), Cache.end());
   1105     break;
   1106   }
   1107 }
   1108 
   1109 /// getNonLocalPointerDepFromBB - Perform a dependency query based on
   1110 /// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
   1111 /// results to the results vector and keep track of which blocks are visited in
   1112 /// 'Visited'.
   1113 ///
   1114 /// This has special behavior for the first block queries (when SkipFirstBlock
   1115 /// is true).  In this special case, it ignores the contents of the specified
   1116 /// block and starts returning dependence info for its predecessors.
   1117 ///
   1118 /// This function returns false on success, or true to indicate that it could
   1119 /// not compute dependence information for some reason.  This should be treated
   1120 /// as a clobber dependence on the first instruction in the predecessor block.
   1121 bool MemoryDependenceAnalysis::getNonLocalPointerDepFromBB(
   1122     Instruction *QueryInst, const PHITransAddr &Pointer,
   1123     const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
   1124     SmallVectorImpl<NonLocalDepResult> &Result,
   1125     DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
   1126   // Look up the cached info for Pointer.
   1127   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
   1128 
   1129   // Set up a temporary NLPI value. If the map doesn't yet have an entry for
   1130   // CacheKey, this value will be inserted as the associated value. Otherwise,
   1131   // it'll be ignored, and we'll have to check to see if the cached size and
   1132   // aa tags are consistent with the current query.
   1133   NonLocalPointerInfo InitialNLPI;
   1134   InitialNLPI.Size = Loc.Size;
   1135   InitialNLPI.AATags = Loc.AATags;
   1136 
   1137   // Get the NLPI for CacheKey, inserting one into the map if it doesn't
   1138   // already have one.
   1139   std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
   1140     NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
   1141   NonLocalPointerInfo *CacheInfo = &Pair.first->second;
   1142 
   1143   // If we already have a cache entry for this CacheKey, we may need to do some
   1144   // work to reconcile the cache entry and the current query.
   1145   if (!Pair.second) {
   1146     if (CacheInfo->Size < Loc.Size) {
   1147       // The query's Size is greater than the cached one. Throw out the
   1148       // cached data and proceed with the query at the greater size.
   1149       CacheInfo->Pair = BBSkipFirstBlockPair();
   1150       CacheInfo->Size = Loc.Size;
   1151       for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
   1152            DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
   1153         if (Instruction *Inst = DI->getResult().getInst())
   1154           RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
   1155       CacheInfo->NonLocalDeps.clear();
   1156     } else if (CacheInfo->Size > Loc.Size) {
   1157       // This query's Size is less than the cached one. Conservatively restart
   1158       // the query using the greater size.
   1159       return getNonLocalPointerDepFromBB(QueryInst, Pointer,
   1160                                          Loc.getWithNewSize(CacheInfo->Size),
   1161                                          isLoad, StartBB, Result, Visited,
   1162                                          SkipFirstBlock);
   1163     }
   1164 
   1165     // If the query's AATags are inconsistent with the cached one,
   1166     // conservatively throw out the cached data and restart the query with
   1167     // no tag if needed.
   1168     if (CacheInfo->AATags != Loc.AATags) {
   1169       if (CacheInfo->AATags) {
   1170         CacheInfo->Pair = BBSkipFirstBlockPair();
   1171         CacheInfo->AATags = AAMDNodes();
   1172         for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
   1173              DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
   1174           if (Instruction *Inst = DI->getResult().getInst())
   1175             RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
   1176         CacheInfo->NonLocalDeps.clear();
   1177       }
   1178       if (Loc.AATags)
   1179         return getNonLocalPointerDepFromBB(QueryInst,
   1180                                            Pointer, Loc.getWithoutAATags(),
   1181                                            isLoad, StartBB, Result, Visited,
   1182                                            SkipFirstBlock);
   1183     }
   1184   }
   1185 
   1186   NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
   1187 
   1188   // If we have valid cached information for exactly the block we are
   1189   // investigating, just return it with no recomputation.
   1190   if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
   1191     // We have a fully cached result for this query then we can just return the
   1192     // cached results and populate the visited set.  However, we have to verify
   1193     // that we don't already have conflicting results for these blocks.  Check
   1194     // to ensure that if a block in the results set is in the visited set that
   1195     // it was for the same pointer query.
   1196     if (!Visited.empty()) {
   1197       for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
   1198            I != E; ++I) {
   1199         DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
   1200         if (VI == Visited.end() || VI->second == Pointer.getAddr())
   1201           continue;
   1202 
   1203         // We have a pointer mismatch in a block.  Just return clobber, saying
   1204         // that something was clobbered in this result.  We could also do a
   1205         // non-fully cached query, but there is little point in doing this.
   1206         return true;
   1207       }
   1208     }
   1209 
   1210     Value *Addr = Pointer.getAddr();
   1211     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
   1212          I != E; ++I) {
   1213       Visited.insert(std::make_pair(I->getBB(), Addr));
   1214       if (I->getResult().isNonLocal()) {
   1215         continue;
   1216       }
   1217 
   1218       if (!DT) {
   1219         Result.push_back(NonLocalDepResult(I->getBB(),
   1220                                            MemDepResult::getUnknown(),
   1221                                            Addr));
   1222       } else if (DT->isReachableFromEntry(I->getBB())) {
   1223         Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
   1224       }
   1225     }
   1226     ++NumCacheCompleteNonLocalPtr;
   1227     return false;
   1228   }
   1229 
   1230   // Otherwise, either this is a new block, a block with an invalid cache
   1231   // pointer or one that we're about to invalidate by putting more info into it
   1232   // than its valid cache info.  If empty, the result will be valid cache info,
   1233   // otherwise it isn't.
   1234   if (Cache->empty())
   1235     CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
   1236   else
   1237     CacheInfo->Pair = BBSkipFirstBlockPair();
   1238 
   1239   SmallVector<BasicBlock*, 32> Worklist;
   1240   Worklist.push_back(StartBB);
   1241 
   1242   // PredList used inside loop.
   1243   SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
   1244 
   1245   // Keep track of the entries that we know are sorted.  Previously cached
   1246   // entries will all be sorted.  The entries we add we only sort on demand (we
   1247   // don't insert every element into its sorted position).  We know that we
   1248   // won't get any reuse from currently inserted values, because we don't
   1249   // revisit blocks after we insert info for them.
   1250   unsigned NumSortedEntries = Cache->size();
   1251   DEBUG(AssertSorted(*Cache));
   1252 
   1253   while (!Worklist.empty()) {
   1254     BasicBlock *BB = Worklist.pop_back_val();
   1255 
   1256     // If we do process a large number of blocks it becomes very expensive and
   1257     // likely it isn't worth worrying about
   1258     if (Result.size() > NumResultsLimit) {
   1259       Worklist.clear();
   1260       // Sort it now (if needed) so that recursive invocations of
   1261       // getNonLocalPointerDepFromBB and other routines that could reuse the
   1262       // cache value will only see properly sorted cache arrays.
   1263       if (Cache && NumSortedEntries != Cache->size()) {
   1264         SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1265       }
   1266       // Since we bail out, the "Cache" set won't contain all of the
   1267       // results for the query.  This is ok (we can still use it to accelerate
   1268       // specific block queries) but we can't do the fastpath "return all
   1269       // results from the set".  Clear out the indicator for this.
   1270       CacheInfo->Pair = BBSkipFirstBlockPair();
   1271       return true;
   1272     }
   1273 
   1274     // Skip the first block if we have it.
   1275     if (!SkipFirstBlock) {
   1276       // Analyze the dependency of *Pointer in FromBB.  See if we already have
   1277       // been here.
   1278       assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
   1279 
   1280       // Get the dependency info for Pointer in BB.  If we have cached
   1281       // information, we will use it, otherwise we compute it.
   1282       DEBUG(AssertSorted(*Cache, NumSortedEntries));
   1283       MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst,
   1284                                                  Loc, isLoad, BB, Cache,
   1285                                                  NumSortedEntries);
   1286 
   1287       // If we got a Def or Clobber, add this to the list of results.
   1288       if (!Dep.isNonLocal()) {
   1289         if (!DT) {
   1290           Result.push_back(NonLocalDepResult(BB,
   1291                                              MemDepResult::getUnknown(),
   1292                                              Pointer.getAddr()));
   1293           continue;
   1294         } else if (DT->isReachableFromEntry(BB)) {
   1295           Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
   1296           continue;
   1297         }
   1298       }
   1299     }
   1300 
   1301     // If 'Pointer' is an instruction defined in this block, then we need to do
   1302     // phi translation to change it into a value live in the predecessor block.
   1303     // If not, we just add the predecessors to the worklist and scan them with
   1304     // the same Pointer.
   1305     if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
   1306       SkipFirstBlock = false;
   1307       SmallVector<BasicBlock*, 16> NewBlocks;
   1308       for (BasicBlock *Pred : PredCache.get(BB)) {
   1309         // Verify that we haven't looked at this block yet.
   1310         std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
   1311           InsertRes = Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
   1312         if (InsertRes.second) {
   1313           // First time we've looked at *PI.
   1314           NewBlocks.push_back(Pred);
   1315           continue;
   1316         }
   1317 
   1318         // If we have seen this block before, but it was with a different
   1319         // pointer then we have a phi translation failure and we have to treat
   1320         // this as a clobber.
   1321         if (InsertRes.first->second != Pointer.getAddr()) {
   1322           // Make sure to clean up the Visited map before continuing on to
   1323           // PredTranslationFailure.
   1324           for (unsigned i = 0; i < NewBlocks.size(); i++)
   1325             Visited.erase(NewBlocks[i]);
   1326           goto PredTranslationFailure;
   1327         }
   1328       }
   1329       Worklist.append(NewBlocks.begin(), NewBlocks.end());
   1330       continue;
   1331     }
   1332 
   1333     // We do need to do phi translation, if we know ahead of time we can't phi
   1334     // translate this value, don't even try.
   1335     if (!Pointer.IsPotentiallyPHITranslatable())
   1336       goto PredTranslationFailure;
   1337 
   1338     // We may have added values to the cache list before this PHI translation.
   1339     // If so, we haven't done anything to ensure that the cache remains sorted.
   1340     // Sort it now (if needed) so that recursive invocations of
   1341     // getNonLocalPointerDepFromBB and other routines that could reuse the cache
   1342     // value will only see properly sorted cache arrays.
   1343     if (Cache && NumSortedEntries != Cache->size()) {
   1344       SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1345       NumSortedEntries = Cache->size();
   1346     }
   1347     Cache = nullptr;
   1348 
   1349     PredList.clear();
   1350     for (BasicBlock *Pred : PredCache.get(BB)) {
   1351       PredList.push_back(std::make_pair(Pred, Pointer));
   1352 
   1353       // Get the PHI translated pointer in this predecessor.  This can fail if
   1354       // not translatable, in which case the getAddr() returns null.
   1355       PHITransAddr &PredPointer = PredList.back().second;
   1356       PredPointer.PHITranslateValue(BB, Pred, DT, /*MustDominate=*/false);
   1357       Value *PredPtrVal = PredPointer.getAddr();
   1358 
   1359       // Check to see if we have already visited this pred block with another
   1360       // pointer.  If so, we can't do this lookup.  This failure can occur
   1361       // with PHI translation when a critical edge exists and the PHI node in
   1362       // the successor translates to a pointer value different than the
   1363       // pointer the block was first analyzed with.
   1364       std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
   1365         InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
   1366 
   1367       if (!InsertRes.second) {
   1368         // We found the pred; take it off the list of preds to visit.
   1369         PredList.pop_back();
   1370 
   1371         // If the predecessor was visited with PredPtr, then we already did
   1372         // the analysis and can ignore it.
   1373         if (InsertRes.first->second == PredPtrVal)
   1374           continue;
   1375 
   1376         // Otherwise, the block was previously analyzed with a different
   1377         // pointer.  We can't represent the result of this case, so we just
   1378         // treat this as a phi translation failure.
   1379 
   1380         // Make sure to clean up the Visited map before continuing on to
   1381         // PredTranslationFailure.
   1382         for (unsigned i = 0, n = PredList.size(); i < n; ++i)
   1383           Visited.erase(PredList[i].first);
   1384 
   1385         goto PredTranslationFailure;
   1386       }
   1387     }
   1388 
   1389     // Actually process results here; this need to be a separate loop to avoid
   1390     // calling getNonLocalPointerDepFromBB for blocks we don't want to return
   1391     // any results for.  (getNonLocalPointerDepFromBB will modify our
   1392     // datastructures in ways the code after the PredTranslationFailure label
   1393     // doesn't expect.)
   1394     for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
   1395       BasicBlock *Pred = PredList[i].first;
   1396       PHITransAddr &PredPointer = PredList[i].second;
   1397       Value *PredPtrVal = PredPointer.getAddr();
   1398 
   1399       bool CanTranslate = true;
   1400       // If PHI translation was unable to find an available pointer in this
   1401       // predecessor, then we have to assume that the pointer is clobbered in
   1402       // that predecessor.  We can still do PRE of the load, which would insert
   1403       // a computation of the pointer in this predecessor.
   1404       if (!PredPtrVal)
   1405         CanTranslate = false;
   1406 
   1407       // FIXME: it is entirely possible that PHI translating will end up with
   1408       // the same value.  Consider PHI translating something like:
   1409       // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
   1410       // to recurse here, pedantically speaking.
   1411 
   1412       // If getNonLocalPointerDepFromBB fails here, that means the cached
   1413       // result conflicted with the Visited list; we have to conservatively
   1414       // assume it is unknown, but this also does not block PRE of the load.
   1415       if (!CanTranslate ||
   1416           getNonLocalPointerDepFromBB(QueryInst, PredPointer,
   1417                                       Loc.getWithNewPtr(PredPtrVal),
   1418                                       isLoad, Pred,
   1419                                       Result, Visited)) {
   1420         // Add the entry to the Result list.
   1421         NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
   1422         Result.push_back(Entry);
   1423 
   1424         // Since we had a phi translation failure, the cache for CacheKey won't
   1425         // include all of the entries that we need to immediately satisfy future
   1426         // queries.  Mark this in NonLocalPointerDeps by setting the
   1427         // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
   1428         // cached value to do more work but not miss the phi trans failure.
   1429         NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
   1430         NLPI.Pair = BBSkipFirstBlockPair();
   1431         continue;
   1432       }
   1433     }
   1434 
   1435     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
   1436     CacheInfo = &NonLocalPointerDeps[CacheKey];
   1437     Cache = &CacheInfo->NonLocalDeps;
   1438     NumSortedEntries = Cache->size();
   1439 
   1440     // Since we did phi translation, the "Cache" set won't contain all of the
   1441     // results for the query.  This is ok (we can still use it to accelerate
   1442     // specific block queries) but we can't do the fastpath "return all
   1443     // results from the set"  Clear out the indicator for this.
   1444     CacheInfo->Pair = BBSkipFirstBlockPair();
   1445     SkipFirstBlock = false;
   1446     continue;
   1447 
   1448   PredTranslationFailure:
   1449     // The following code is "failure"; we can't produce a sane translation
   1450     // for the given block.  It assumes that we haven't modified any of
   1451     // our datastructures while processing the current block.
   1452 
   1453     if (!Cache) {
   1454       // Refresh the CacheInfo/Cache pointer if it got invalidated.
   1455       CacheInfo = &NonLocalPointerDeps[CacheKey];
   1456       Cache = &CacheInfo->NonLocalDeps;
   1457       NumSortedEntries = Cache->size();
   1458     }
   1459 
   1460     // Since we failed phi translation, the "Cache" set won't contain all of the
   1461     // results for the query.  This is ok (we can still use it to accelerate
   1462     // specific block queries) but we can't do the fastpath "return all
   1463     // results from the set".  Clear out the indicator for this.
   1464     CacheInfo->Pair = BBSkipFirstBlockPair();
   1465 
   1466     // If *nothing* works, mark the pointer as unknown.
   1467     //
   1468     // If this is the magic first block, return this as a clobber of the whole
   1469     // incoming value.  Since we can't phi translate to one of the predecessors,
   1470     // we have to bail out.
   1471     if (SkipFirstBlock)
   1472       return true;
   1473 
   1474     for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
   1475       assert(I != Cache->rend() && "Didn't find current block??");
   1476       if (I->getBB() != BB)
   1477         continue;
   1478 
   1479       assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) &&
   1480              "Should only be here with transparent block");
   1481       I->setResult(MemDepResult::getUnknown());
   1482       Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
   1483                                          Pointer.getAddr()));
   1484       break;
   1485     }
   1486   }
   1487 
   1488   // Okay, we're done now.  If we added new values to the cache, re-sort it.
   1489   SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1490   DEBUG(AssertSorted(*Cache));
   1491   return false;
   1492 }
   1493 
   1494 /// RemoveCachedNonLocalPointerDependencies - If P exists in
   1495 /// CachedNonLocalPointerInfo, remove it.
   1496 void MemoryDependenceAnalysis::
   1497 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
   1498   CachedNonLocalPointerInfo::iterator It =
   1499     NonLocalPointerDeps.find(P);
   1500   if (It == NonLocalPointerDeps.end()) return;
   1501 
   1502   // Remove all of the entries in the BB->val map.  This involves removing
   1503   // instructions from the reverse map.
   1504   NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
   1505 
   1506   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
   1507     Instruction *Target = PInfo[i].getResult().getInst();
   1508     if (!Target) continue;  // Ignore non-local dep results.
   1509     assert(Target->getParent() == PInfo[i].getBB());
   1510 
   1511     // Eliminating the dirty entry from 'Cache', so update the reverse info.
   1512     RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
   1513   }
   1514 
   1515   // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
   1516   NonLocalPointerDeps.erase(It);
   1517 }
   1518 
   1519 
   1520 /// invalidateCachedPointerInfo - This method is used to invalidate cached
   1521 /// information about the specified pointer, because it may be too
   1522 /// conservative in memdep.  This is an optional call that can be used when
   1523 /// the client detects an equivalence between the pointer and some other
   1524 /// value and replaces the other value with ptr. This can make Ptr available
   1525 /// in more places that cached info does not necessarily keep.
   1526 void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
   1527   // If Ptr isn't really a pointer, just ignore it.
   1528   if (!Ptr->getType()->isPointerTy()) return;
   1529   // Flush store info for the pointer.
   1530   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
   1531   // Flush load info for the pointer.
   1532   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
   1533 }
   1534 
   1535 /// invalidateCachedPredecessors - Clear the PredIteratorCache info.
   1536 /// This needs to be done when the CFG changes, e.g., due to splitting
   1537 /// critical edges.
   1538 void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
   1539   PredCache.clear();
   1540 }
   1541 
   1542 /// removeInstruction - Remove an instruction from the dependence analysis,
   1543 /// updating the dependence of instructions that previously depended on it.
   1544 /// This method attempts to keep the cache coherent using the reverse map.
   1545 void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
   1546   // Walk through the Non-local dependencies, removing this one as the value
   1547   // for any cached queries.
   1548   NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
   1549   if (NLDI != NonLocalDeps.end()) {
   1550     NonLocalDepInfo &BlockMap = NLDI->second.first;
   1551     for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
   1552          DI != DE; ++DI)
   1553       if (Instruction *Inst = DI->getResult().getInst())
   1554         RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
   1555     NonLocalDeps.erase(NLDI);
   1556   }
   1557 
   1558   // If we have a cached local dependence query for this instruction, remove it.
   1559   //
   1560   LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
   1561   if (LocalDepEntry != LocalDeps.end()) {
   1562     // Remove us from DepInst's reverse set now that the local dep info is gone.
   1563     if (Instruction *Inst = LocalDepEntry->second.getInst())
   1564       RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
   1565 
   1566     // Remove this local dependency info.
   1567     LocalDeps.erase(LocalDepEntry);
   1568   }
   1569 
   1570   // If we have any cached pointer dependencies on this instruction, remove
   1571   // them.  If the instruction has non-pointer type, then it can't be a pointer
   1572   // base.
   1573 
   1574   // Remove it from both the load info and the store info.  The instruction
   1575   // can't be in either of these maps if it is non-pointer.
   1576   if (RemInst->getType()->isPointerTy()) {
   1577     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
   1578     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
   1579   }
   1580 
   1581   // Loop over all of the things that depend on the instruction we're removing.
   1582   //
   1583   SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
   1584 
   1585   // If we find RemInst as a clobber or Def in any of the maps for other values,
   1586   // we need to replace its entry with a dirty version of the instruction after
   1587   // it.  If RemInst is a terminator, we use a null dirty value.
   1588   //
   1589   // Using a dirty version of the instruction after RemInst saves having to scan
   1590   // the entire block to get to this point.
   1591   MemDepResult NewDirtyVal;
   1592   if (!RemInst->isTerminator())
   1593     NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
   1594 
   1595   ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
   1596   if (ReverseDepIt != ReverseLocalDeps.end()) {
   1597     // RemInst can't be the terminator if it has local stuff depending on it.
   1598     assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
   1599            "Nothing can locally depend on a terminator");
   1600 
   1601     for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
   1602       assert(InstDependingOnRemInst != RemInst &&
   1603              "Already removed our local dep info");
   1604 
   1605       LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
   1606 
   1607       // Make sure to remember that new things depend on NewDepInst.
   1608       assert(NewDirtyVal.getInst() && "There is no way something else can have "
   1609              "a local dep on this if it is a terminator!");
   1610       ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
   1611                                                 InstDependingOnRemInst));
   1612     }
   1613 
   1614     ReverseLocalDeps.erase(ReverseDepIt);
   1615 
   1616     // Add new reverse deps after scanning the set, to avoid invalidating the
   1617     // 'ReverseDeps' reference.
   1618     while (!ReverseDepsToAdd.empty()) {
   1619       ReverseLocalDeps[ReverseDepsToAdd.back().first]
   1620         .insert(ReverseDepsToAdd.back().second);
   1621       ReverseDepsToAdd.pop_back();
   1622     }
   1623   }
   1624 
   1625   ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
   1626   if (ReverseDepIt != ReverseNonLocalDeps.end()) {
   1627     for (Instruction *I : ReverseDepIt->second) {
   1628       assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
   1629 
   1630       PerInstNLInfo &INLD = NonLocalDeps[I];
   1631       // The information is now dirty!
   1632       INLD.second = true;
   1633 
   1634       for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
   1635            DE = INLD.first.end(); DI != DE; ++DI) {
   1636         if (DI->getResult().getInst() != RemInst) continue;
   1637 
   1638         // Convert to a dirty entry for the subsequent instruction.
   1639         DI->setResult(NewDirtyVal);
   1640 
   1641         if (Instruction *NextI = NewDirtyVal.getInst())
   1642           ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
   1643       }
   1644     }
   1645 
   1646     ReverseNonLocalDeps.erase(ReverseDepIt);
   1647 
   1648     // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
   1649     while (!ReverseDepsToAdd.empty()) {
   1650       ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
   1651         .insert(ReverseDepsToAdd.back().second);
   1652       ReverseDepsToAdd.pop_back();
   1653     }
   1654   }
   1655 
   1656   // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
   1657   // value in the NonLocalPointerDeps info.
   1658   ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
   1659     ReverseNonLocalPtrDeps.find(RemInst);
   1660   if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
   1661     SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
   1662 
   1663     for (ValueIsLoadPair P : ReversePtrDepIt->second) {
   1664       assert(P.getPointer() != RemInst &&
   1665              "Already removed NonLocalPointerDeps info for RemInst");
   1666 
   1667       NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
   1668 
   1669       // The cache is not valid for any specific block anymore.
   1670       NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
   1671 
   1672       // Update any entries for RemInst to use the instruction after it.
   1673       for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
   1674            DI != DE; ++DI) {
   1675         if (DI->getResult().getInst() != RemInst) continue;
   1676 
   1677         // Convert to a dirty entry for the subsequent instruction.
   1678         DI->setResult(NewDirtyVal);
   1679 
   1680         if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
   1681           ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
   1682       }
   1683 
   1684       // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
   1685       // subsequent value may invalidate the sortedness.
   1686       std::sort(NLPDI.begin(), NLPDI.end());
   1687     }
   1688 
   1689     ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
   1690 
   1691     while (!ReversePtrDepsToAdd.empty()) {
   1692       ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
   1693         .insert(ReversePtrDepsToAdd.back().second);
   1694       ReversePtrDepsToAdd.pop_back();
   1695     }
   1696   }
   1697 
   1698 
   1699   assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
   1700   DEBUG(verifyRemoved(RemInst));
   1701 }
   1702 /// verifyRemoved - Verify that the specified instruction does not occur
   1703 /// in our internal data structures. This function verifies by asserting in
   1704 /// debug builds.
   1705 void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
   1706 #ifndef NDEBUG
   1707   for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
   1708        E = LocalDeps.end(); I != E; ++I) {
   1709     assert(I->first != D && "Inst occurs in data structures");
   1710     assert(I->second.getInst() != D &&
   1711            "Inst occurs in data structures");
   1712   }
   1713 
   1714   for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
   1715        E = NonLocalPointerDeps.end(); I != E; ++I) {
   1716     assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
   1717     const NonLocalDepInfo &Val = I->second.NonLocalDeps;
   1718     for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
   1719          II != E; ++II)
   1720       assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
   1721   }
   1722 
   1723   for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
   1724        E = NonLocalDeps.end(); I != E; ++I) {
   1725     assert(I->first != D && "Inst occurs in data structures");
   1726     const PerInstNLInfo &INLD = I->second;
   1727     for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
   1728          EE = INLD.first.end(); II  != EE; ++II)
   1729       assert(II->getResult().getInst() != D && "Inst occurs in data structures");
   1730   }
   1731 
   1732   for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
   1733        E = ReverseLocalDeps.end(); I != E; ++I) {
   1734     assert(I->first != D && "Inst occurs in data structures");
   1735     for (Instruction *Inst : I->second)
   1736       assert(Inst != D && "Inst occurs in data structures");
   1737   }
   1738 
   1739   for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
   1740        E = ReverseNonLocalDeps.end();
   1741        I != E; ++I) {
   1742     assert(I->first != D && "Inst occurs in data structures");
   1743     for (Instruction *Inst : I->second)
   1744       assert(Inst != D && "Inst occurs in data structures");
   1745   }
   1746 
   1747   for (ReverseNonLocalPtrDepTy::const_iterator
   1748        I = ReverseNonLocalPtrDeps.begin(),
   1749        E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
   1750     assert(I->first != D && "Inst occurs in rev NLPD map");
   1751 
   1752     for (ValueIsLoadPair P : I->second)
   1753       assert(P != ValueIsLoadPair(D, false) &&
   1754              P != ValueIsLoadPair(D, true) &&
   1755              "Inst occurs in ReverseNonLocalPtrDeps map");
   1756   }
   1757 #endif
   1758 }
   1759