Home | History | Annotate | Download | only in Analysis
      1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements an analysis that determines, for a given memory
     11 // operation, what preceding memory operations it depends on.  It builds on
     12 // alias analysis information, and tries to provide a lazy, caching interface to
     13 // a common kind of alias information query.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
     18 #include "llvm/ADT/STLExtras.h"
     19 #include "llvm/ADT/Statistic.h"
     20 #include "llvm/Analysis/AliasAnalysis.h"
     21 #include "llvm/Analysis/AssumptionCache.h"
     22 #include "llvm/Analysis/InstructionSimplify.h"
     23 #include "llvm/Analysis/MemoryBuiltins.h"
     24 #include "llvm/Analysis/PHITransAddr.h"
     25 #include "llvm/Analysis/OrderedBasicBlock.h"
     26 #include "llvm/Analysis/ValueTracking.h"
     27 #include "llvm/Analysis/TargetLibraryInfo.h"
     28 #include "llvm/IR/DataLayout.h"
     29 #include "llvm/IR/Dominators.h"
     30 #include "llvm/IR/Function.h"
     31 #include "llvm/IR/Instructions.h"
     32 #include "llvm/IR/IntrinsicInst.h"
     33 #include "llvm/IR/LLVMContext.h"
     34 #include "llvm/IR/PredIteratorCache.h"
     35 #include "llvm/Support/Debug.h"
     36 using namespace llvm;
     37 
     38 #define DEBUG_TYPE "memdep"
     39 
     40 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
     41 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
     42 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
     43 
     44 STATISTIC(NumCacheNonLocalPtr,
     45           "Number of fully cached non-local ptr responses");
     46 STATISTIC(NumCacheDirtyNonLocalPtr,
     47           "Number of cached, but dirty, non-local ptr responses");
     48 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
     49 STATISTIC(NumCacheCompleteNonLocalPtr,
     50           "Number of block queries that were completely cached");
     51 
     52 // Limit for the number of instructions to scan in a block.
     53 
     54 static cl::opt<unsigned> BlockScanLimit(
     55     "memdep-block-scan-limit", cl::Hidden, cl::init(100),
     56     cl::desc("The number of instructions to scan in a block in memory "
     57              "dependency analysis (default = 100)"));
     58 
     59 static cl::opt<unsigned>
     60     BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
     61                      cl::desc("The number of blocks to scan during memory "
     62                               "dependency analysis (default = 1000)"));
     63 
     64 // Limit on the number of memdep results to process.
     65 static const unsigned int NumResultsLimit = 100;
     66 
     67 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
     68 ///
     69 /// If the set becomes empty, remove Inst's entry.
     70 template <typename KeyTy>
     71 static void
     72 RemoveFromReverseMap(DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>> &ReverseMap,
     73                      Instruction *Inst, KeyTy Val) {
     74   typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
     75       ReverseMap.find(Inst);
     76   assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
     77   bool Found = InstIt->second.erase(Val);
     78   assert(Found && "Invalid reverse map!");
     79   (void)Found;
     80   if (InstIt->second.empty())
     81     ReverseMap.erase(InstIt);
     82 }
     83 
     84 /// If the given instruction references a specific memory location, fill in Loc
     85 /// with the details, otherwise set Loc.Ptr to null.
     86 ///
     87 /// Returns a ModRefInfo value describing the general behavior of the
     88 /// instruction.
     89 static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
     90                               const TargetLibraryInfo &TLI) {
     91   if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
     92     if (LI->isUnordered()) {
     93       Loc = MemoryLocation::get(LI);
     94       return MRI_Ref;
     95     }
     96     if (LI->getOrdering() == AtomicOrdering::Monotonic) {
     97       Loc = MemoryLocation::get(LI);
     98       return MRI_ModRef;
     99     }
    100     Loc = MemoryLocation();
    101     return MRI_ModRef;
    102   }
    103 
    104   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    105     if (SI->isUnordered()) {
    106       Loc = MemoryLocation::get(SI);
    107       return MRI_Mod;
    108     }
    109     if (SI->getOrdering() == AtomicOrdering::Monotonic) {
    110       Loc = MemoryLocation::get(SI);
    111       return MRI_ModRef;
    112     }
    113     Loc = MemoryLocation();
    114     return MRI_ModRef;
    115   }
    116 
    117   if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
    118     Loc = MemoryLocation::get(V);
    119     return MRI_ModRef;
    120   }
    121 
    122   if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
    123     // calls to free() deallocate the entire structure
    124     Loc = MemoryLocation(CI->getArgOperand(0));
    125     return MRI_Mod;
    126   }
    127 
    128   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    129     AAMDNodes AAInfo;
    130 
    131     switch (II->getIntrinsicID()) {
    132     case Intrinsic::lifetime_start:
    133     case Intrinsic::lifetime_end:
    134     case Intrinsic::invariant_start:
    135       II->getAAMetadata(AAInfo);
    136       Loc = MemoryLocation(
    137           II->getArgOperand(1),
    138           cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
    139       // These intrinsics don't really modify the memory, but returning Mod
    140       // will allow them to be handled conservatively.
    141       return MRI_Mod;
    142     case Intrinsic::invariant_end:
    143       II->getAAMetadata(AAInfo);
    144       Loc = MemoryLocation(
    145           II->getArgOperand(2),
    146           cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
    147       // These intrinsics don't really modify the memory, but returning Mod
    148       // will allow them to be handled conservatively.
    149       return MRI_Mod;
    150     default:
    151       break;
    152     }
    153   }
    154 
    155   // Otherwise, just do the coarse-grained thing that always works.
    156   if (Inst->mayWriteToMemory())
    157     return MRI_ModRef;
    158   if (Inst->mayReadFromMemory())
    159     return MRI_Ref;
    160   return MRI_NoModRef;
    161 }
    162 
    163 /// Private helper for finding the local dependencies of a call site.
    164 MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
    165     CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
    166     BasicBlock *BB) {
    167   unsigned Limit = BlockScanLimit;
    168 
    169   // Walk backwards through the block, looking for dependencies
    170   while (ScanIt != BB->begin()) {
    171     // Limit the amount of scanning we do so we don't end up with quadratic
    172     // running time on extreme testcases.
    173     --Limit;
    174     if (!Limit)
    175       return MemDepResult::getUnknown();
    176 
    177     Instruction *Inst = &*--ScanIt;
    178 
    179     // If this inst is a memory op, get the pointer it accessed
    180     MemoryLocation Loc;
    181     ModRefInfo MR = GetLocation(Inst, Loc, TLI);
    182     if (Loc.Ptr) {
    183       // A simple instruction.
    184       if (AA.getModRefInfo(CS, Loc) != MRI_NoModRef)
    185         return MemDepResult::getClobber(Inst);
    186       continue;
    187     }
    188 
    189     if (auto InstCS = CallSite(Inst)) {
    190       // Debug intrinsics don't cause dependences.
    191       if (isa<DbgInfoIntrinsic>(Inst))
    192         continue;
    193       // If these two calls do not interfere, look past it.
    194       switch (AA.getModRefInfo(CS, InstCS)) {
    195       case MRI_NoModRef:
    196         // If the two calls are the same, return InstCS as a Def, so that
    197         // CS can be found redundant and eliminated.
    198         if (isReadOnlyCall && !(MR & MRI_Mod) &&
    199             CS.getInstruction()->isIdenticalToWhenDefined(Inst))
    200           return MemDepResult::getDef(Inst);
    201 
    202         // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
    203         // keep scanning.
    204         continue;
    205       default:
    206         return MemDepResult::getClobber(Inst);
    207       }
    208     }
    209 
    210     // If we could not obtain a pointer for the instruction and the instruction
    211     // touches memory then assume that this is a dependency.
    212     if (MR != MRI_NoModRef)
    213       return MemDepResult::getClobber(Inst);
    214   }
    215 
    216   // No dependence found.  If this is the entry block of the function, it is
    217   // unknown, otherwise it is non-local.
    218   if (BB != &BB->getParent()->getEntryBlock())
    219     return MemDepResult::getNonLocal();
    220   return MemDepResult::getNonFuncLocal();
    221 }
    222 
    223 /// Return true if LI is a load that would fully overlap MemLoc if done as
    224 /// a wider legal integer load.
    225 ///
    226 /// MemLocBase, MemLocOffset are lazily computed here the first time the
    227 /// base/offs of memloc is needed.
    228 static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
    229                                                    const Value *&MemLocBase,
    230                                                    int64_t &MemLocOffs,
    231                                                    const LoadInst *LI) {
    232   const DataLayout &DL = LI->getModule()->getDataLayout();
    233 
    234   // If we haven't already computed the base/offset of MemLoc, do so now.
    235   if (!MemLocBase)
    236     MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
    237 
    238   unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
    239       MemLocBase, MemLocOffs, MemLoc.Size, LI);
    240   return Size != 0;
    241 }
    242 
    243 unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
    244     const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
    245     const LoadInst *LI) {
    246   // We can only extend simple integer loads.
    247   if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
    248     return 0;
    249 
    250   // Load widening is hostile to ThreadSanitizer: it may cause false positives
    251   // or make the reports more cryptic (access sizes are wrong).
    252   if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
    253     return 0;
    254 
    255   const DataLayout &DL = LI->getModule()->getDataLayout();
    256 
    257   // Get the base of this load.
    258   int64_t LIOffs = 0;
    259   const Value *LIBase =
    260       GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
    261 
    262   // If the two pointers are not based on the same pointer, we can't tell that
    263   // they are related.
    264   if (LIBase != MemLocBase)
    265     return 0;
    266 
    267   // Okay, the two values are based on the same pointer, but returned as
    268   // no-alias.  This happens when we have things like two byte loads at "P+1"
    269   // and "P+3".  Check to see if increasing the size of the "LI" load up to its
    270   // alignment (or the largest native integer type) will allow us to load all
    271   // the bits required by MemLoc.
    272 
    273   // If MemLoc is before LI, then no widening of LI will help us out.
    274   if (MemLocOffs < LIOffs)
    275     return 0;
    276 
    277   // Get the alignment of the load in bytes.  We assume that it is safe to load
    278   // any legal integer up to this size without a problem.  For example, if we're
    279   // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
    280   // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
    281   // to i16.
    282   unsigned LoadAlign = LI->getAlignment();
    283 
    284   int64_t MemLocEnd = MemLocOffs + MemLocSize;
    285 
    286   // If no amount of rounding up will let MemLoc fit into LI, then bail out.
    287   if (LIOffs + LoadAlign < MemLocEnd)
    288     return 0;
    289 
    290   // This is the size of the load to try.  Start with the next larger power of
    291   // two.
    292   unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
    293   NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
    294 
    295   while (1) {
    296     // If this load size is bigger than our known alignment or would not fit
    297     // into a native integer register, then we fail.
    298     if (NewLoadByteSize > LoadAlign ||
    299         !DL.fitsInLegalInteger(NewLoadByteSize * 8))
    300       return 0;
    301 
    302     if (LIOffs + NewLoadByteSize > MemLocEnd &&
    303         LI->getParent()->getParent()->hasFnAttribute(
    304             Attribute::SanitizeAddress))
    305       // We will be reading past the location accessed by the original program.
    306       // While this is safe in a regular build, Address Safety analysis tools
    307       // may start reporting false warnings. So, don't do widening.
    308       return 0;
    309 
    310     // If a load of this width would include all of MemLoc, then we succeed.
    311     if (LIOffs + NewLoadByteSize >= MemLocEnd)
    312       return NewLoadByteSize;
    313 
    314     NewLoadByteSize <<= 1;
    315   }
    316 }
    317 
    318 static bool isVolatile(Instruction *Inst) {
    319   if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
    320     return LI->isVolatile();
    321   else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
    322     return SI->isVolatile();
    323   else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
    324     return AI->isVolatile();
    325   return false;
    326 }
    327 
    328 MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
    329     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    330     BasicBlock *BB, Instruction *QueryInst) {
    331 
    332   if (QueryInst != nullptr) {
    333     if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
    334       MemDepResult invariantGroupDependency =
    335           getInvariantGroupPointerDependency(LI, BB);
    336 
    337       if (invariantGroupDependency.isDef())
    338         return invariantGroupDependency;
    339     }
    340   }
    341   return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
    342 }
    343 
    344 MemDepResult
    345 MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
    346                                                              BasicBlock *BB) {
    347   Value *LoadOperand = LI->getPointerOperand();
    348   // It's is not safe to walk the use list of global value, because function
    349   // passes aren't allowed to look outside their functions.
    350   if (isa<GlobalValue>(LoadOperand))
    351     return MemDepResult::getUnknown();
    352 
    353   auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
    354   if (!InvariantGroupMD)
    355     return MemDepResult::getUnknown();
    356 
    357   MemDepResult Result = MemDepResult::getUnknown();
    358   llvm::SmallSet<Value *, 14> Seen;
    359   // Queue to process all pointers that are equivalent to load operand.
    360   llvm::SmallVector<Value *, 8> LoadOperandsQueue;
    361   LoadOperandsQueue.push_back(LoadOperand);
    362   while (!LoadOperandsQueue.empty()) {
    363     Value *Ptr = LoadOperandsQueue.pop_back_val();
    364     if (isa<GlobalValue>(Ptr))
    365       continue;
    366 
    367     if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
    368       if (Seen.insert(BCI->getOperand(0)).second) {
    369         LoadOperandsQueue.push_back(BCI->getOperand(0));
    370       }
    371     }
    372 
    373     for (Use &Us : Ptr->uses()) {
    374       auto *U = dyn_cast<Instruction>(Us.getUser());
    375       if (!U || U == LI || !DT.dominates(U, LI))
    376         continue;
    377 
    378       if (auto *BCI = dyn_cast<BitCastInst>(U)) {
    379         if (Seen.insert(BCI).second) {
    380           LoadOperandsQueue.push_back(BCI);
    381         }
    382         continue;
    383       }
    384       // If we hit load/store with the same invariant.group metadata (and the
    385       // same pointer operand) we can assume that value pointed by pointer
    386       // operand didn't change.
    387       if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
    388           U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
    389         return MemDepResult::getDef(U);
    390     }
    391   }
    392   return Result;
    393 }
    394 
    395 MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
    396     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    397     BasicBlock *BB, Instruction *QueryInst) {
    398 
    399   const Value *MemLocBase = nullptr;
    400   int64_t MemLocOffset = 0;
    401   unsigned Limit = BlockScanLimit;
    402   bool isInvariantLoad = false;
    403 
    404   // We must be careful with atomic accesses, as they may allow another thread
    405   //   to touch this location, clobbering it. We are conservative: if the
    406   //   QueryInst is not a simple (non-atomic) memory access, we automatically
    407   //   return getClobber.
    408   // If it is simple, we know based on the results of
    409   // "Compiler testing via a theory of sound optimisations in the C11/C++11
    410   //   memory model" in PLDI 2013, that a non-atomic location can only be
    411   //   clobbered between a pair of a release and an acquire action, with no
    412   //   access to the location in between.
    413   // Here is an example for giving the general intuition behind this rule.
    414   // In the following code:
    415   //   store x 0;
    416   //   release action; [1]
    417   //   acquire action; [4]
    418   //   %val = load x;
    419   // It is unsafe to replace %val by 0 because another thread may be running:
    420   //   acquire action; [2]
    421   //   store x 42;
    422   //   release action; [3]
    423   // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
    424   // being 42. A key property of this program however is that if either
    425   // 1 or 4 were missing, there would be a race between the store of 42
    426   // either the store of 0 or the load (making the whole program racy).
    427   // The paper mentioned above shows that the same property is respected
    428   // by every program that can detect any optimization of that kind: either
    429   // it is racy (undefined) or there is a release followed by an acquire
    430   // between the pair of accesses under consideration.
    431 
    432   // If the load is invariant, we "know" that it doesn't alias *any* write. We
    433   // do want to respect mustalias results since defs are useful for value
    434   // forwarding, but any mayalias write can be assumed to be noalias.
    435   // Arguably, this logic should be pushed inside AliasAnalysis itself.
    436   if (isLoad && QueryInst) {
    437     LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
    438     if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
    439       isInvariantLoad = true;
    440   }
    441 
    442   const DataLayout &DL = BB->getModule()->getDataLayout();
    443 
    444   // Create a numbered basic block to lazily compute and cache instruction
    445   // positions inside a BB. This is used to provide fast queries for relative
    446   // position between two instructions in a BB and can be used by
    447   // AliasAnalysis::callCapturesBefore.
    448   OrderedBasicBlock OBB(BB);
    449 
    450   // Return "true" if and only if the instruction I is either a non-simple
    451   // load or a non-simple store.
    452   auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
    453     if (auto *LI = dyn_cast<LoadInst>(I))
    454       return !LI->isSimple();
    455     if (auto *SI = dyn_cast<StoreInst>(I))
    456       return !SI->isSimple();
    457     return false;
    458   };
    459 
    460   // Return "true" if I is not a load and not a store, but it does access
    461   // memory.
    462   auto isOtherMemAccess = [](Instruction *I) -> bool {
    463     return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
    464   };
    465 
    466   // Walk backwards through the basic block, looking for dependencies.
    467   while (ScanIt != BB->begin()) {
    468     Instruction *Inst = &*--ScanIt;
    469 
    470     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
    471       // Debug intrinsics don't (and can't) cause dependencies.
    472       if (isa<DbgInfoIntrinsic>(II))
    473         continue;
    474 
    475     // Limit the amount of scanning we do so we don't end up with quadratic
    476     // running time on extreme testcases.
    477     --Limit;
    478     if (!Limit)
    479       return MemDepResult::getUnknown();
    480 
    481     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    482       // If we reach a lifetime begin or end marker, then the query ends here
    483       // because the value is undefined.
    484       if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
    485         // FIXME: This only considers queries directly on the invariant-tagged
    486         // pointer, not on query pointers that are indexed off of them.  It'd
    487         // be nice to handle that at some point (the right approach is to use
    488         // GetPointerBaseWithConstantOffset).
    489         if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
    490           return MemDepResult::getDef(II);
    491         continue;
    492       }
    493     }
    494 
    495     // Values depend on loads if the pointers are must aliased.  This means
    496     // that a load depends on another must aliased load from the same value.
    497     // One exception is atomic loads: a value can depend on an atomic load that
    498     // it does not alias with when this atomic load indicates that another
    499     // thread may be accessing the location.
    500     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    501 
    502       // While volatile access cannot be eliminated, they do not have to clobber
    503       // non-aliasing locations, as normal accesses, for example, can be safely
    504       // reordered with volatile accesses.
    505       if (LI->isVolatile()) {
    506         if (!QueryInst)
    507           // Original QueryInst *may* be volatile
    508           return MemDepResult::getClobber(LI);
    509         if (isVolatile(QueryInst))
    510           // Ordering required if QueryInst is itself volatile
    511           return MemDepResult::getClobber(LI);
    512         // Otherwise, volatile doesn't imply any special ordering
    513       }
    514 
    515       // Atomic loads have complications involved.
    516       // A Monotonic (or higher) load is OK if the query inst is itself not
    517       // atomic.
    518       // FIXME: This is overly conservative.
    519       if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
    520         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    521             isOtherMemAccess(QueryInst))
    522           return MemDepResult::getClobber(LI);
    523         if (LI->getOrdering() != AtomicOrdering::Monotonic)
    524           return MemDepResult::getClobber(LI);
    525       }
    526 
    527       MemoryLocation LoadLoc = MemoryLocation::get(LI);
    528 
    529       // If we found a pointer, check if it could be the same as our pointer.
    530       AliasResult R = AA.alias(LoadLoc, MemLoc);
    531 
    532       if (isLoad) {
    533         if (R == NoAlias) {
    534           // If this is an over-aligned integer load (for example,
    535           // "load i8* %P, align 4") see if it would obviously overlap with the
    536           // queried location if widened to a larger load (e.g. if the queried
    537           // location is 1 byte at P+1).  If so, return it as a load/load
    538           // clobber result, allowing the client to decide to widen the load if
    539           // it wants to.
    540           if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
    541             if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
    542                 isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
    543                                                        MemLocOffset, LI))
    544               return MemDepResult::getClobber(Inst);
    545           }
    546           continue;
    547         }
    548 
    549         // Must aliased loads are defs of each other.
    550         if (R == MustAlias)
    551           return MemDepResult::getDef(Inst);
    552 
    553 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
    554       // in terms of clobbering loads, but since it does this by looking
    555       // at the clobbering load directly, it doesn't know about any
    556       // phi translation that may have happened along the way.
    557 
    558         // If we have a partial alias, then return this as a clobber for the
    559         // client to handle.
    560         if (R == PartialAlias)
    561           return MemDepResult::getClobber(Inst);
    562 #endif
    563 
    564         // Random may-alias loads don't depend on each other without a
    565         // dependence.
    566         continue;
    567       }
    568 
    569       // Stores don't depend on other no-aliased accesses.
    570       if (R == NoAlias)
    571         continue;
    572 
    573       // Stores don't alias loads from read-only memory.
    574       if (AA.pointsToConstantMemory(LoadLoc))
    575         continue;
    576 
    577       // Stores depend on may/must aliased loads.
    578       return MemDepResult::getDef(Inst);
    579     }
    580 
    581     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    582       // Atomic stores have complications involved.
    583       // A Monotonic store is OK if the query inst is itself not atomic.
    584       // FIXME: This is overly conservative.
    585       if (!SI->isUnordered() && SI->isAtomic()) {
    586         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    587             isOtherMemAccess(QueryInst))
    588           return MemDepResult::getClobber(SI);
    589         if (SI->getOrdering() != AtomicOrdering::Monotonic)
    590           return MemDepResult::getClobber(SI);
    591       }
    592 
    593       // FIXME: this is overly conservative.
    594       // While volatile access cannot be eliminated, they do not have to clobber
    595       // non-aliasing locations, as normal accesses can for example be reordered
    596       // with volatile accesses.
    597       if (SI->isVolatile())
    598         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    599             isOtherMemAccess(QueryInst))
    600           return MemDepResult::getClobber(SI);
    601 
    602       // If alias analysis can tell that this store is guaranteed to not modify
    603       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
    604       // the query pointer points to constant memory etc.
    605       if (AA.getModRefInfo(SI, MemLoc) == MRI_NoModRef)
    606         continue;
    607 
    608       // Ok, this store might clobber the query pointer.  Check to see if it is
    609       // a must alias: in this case, we want to return this as a def.
    610       MemoryLocation StoreLoc = MemoryLocation::get(SI);
    611 
    612       // If we found a pointer, check if it could be the same as our pointer.
    613       AliasResult R = AA.alias(StoreLoc, MemLoc);
    614 
    615       if (R == NoAlias)
    616         continue;
    617       if (R == MustAlias)
    618         return MemDepResult::getDef(Inst);
    619       if (isInvariantLoad)
    620         continue;
    621       return MemDepResult::getClobber(Inst);
    622     }
    623 
    624     // If this is an allocation, and if we know that the accessed pointer is to
    625     // the allocation, return Def.  This means that there is no dependence and
    626     // the access can be optimized based on that.  For example, a load could
    627     // turn into undef.  Note that we can bypass the allocation itself when
    628     // looking for a clobber in many cases; that's an alias property and is
    629     // handled by BasicAA.
    630     if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
    631       const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
    632       if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
    633         return MemDepResult::getDef(Inst);
    634     }
    635 
    636     if (isInvariantLoad)
    637       continue;
    638 
    639     // A release fence requires that all stores complete before it, but does
    640     // not prevent the reordering of following loads or stores 'before' the
    641     // fence.  As a result, we look past it when finding a dependency for
    642     // loads.  DSE uses this to find preceeding stores to delete and thus we
    643     // can't bypass the fence if the query instruction is a store.
    644     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
    645       if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
    646         continue;
    647 
    648     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
    649     ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
    650     // If necessary, perform additional analysis.
    651     if (MR == MRI_ModRef)
    652       MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
    653     switch (MR) {
    654     case MRI_NoModRef:
    655       // If the call has no effect on the queried pointer, just ignore it.
    656       continue;
    657     case MRI_Mod:
    658       return MemDepResult::getClobber(Inst);
    659     case MRI_Ref:
    660       // If the call is known to never store to the pointer, and if this is a
    661       // load query, we can safely ignore it (scan past it).
    662       if (isLoad)
    663         continue;
    664     default:
    665       // Otherwise, there is a potential dependence.  Return a clobber.
    666       return MemDepResult::getClobber(Inst);
    667     }
    668   }
    669 
    670   // No dependence found.  If this is the entry block of the function, it is
    671   // unknown, otherwise it is non-local.
    672   if (BB != &BB->getParent()->getEntryBlock())
    673     return MemDepResult::getNonLocal();
    674   return MemDepResult::getNonFuncLocal();
    675 }
    676 
    677 MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
    678   Instruction *ScanPos = QueryInst;
    679 
    680   // Check for a cached result
    681   MemDepResult &LocalCache = LocalDeps[QueryInst];
    682 
    683   // If the cached entry is non-dirty, just return it.  Note that this depends
    684   // on MemDepResult's default constructing to 'dirty'.
    685   if (!LocalCache.isDirty())
    686     return LocalCache;
    687 
    688   // Otherwise, if we have a dirty entry, we know we can start the scan at that
    689   // instruction, which may save us some work.
    690   if (Instruction *Inst = LocalCache.getInst()) {
    691     ScanPos = Inst;
    692 
    693     RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
    694   }
    695 
    696   BasicBlock *QueryParent = QueryInst->getParent();
    697 
    698   // Do the scan.
    699   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
    700     // No dependence found.  If this is the entry block of the function, it is
    701     // unknown, otherwise it is non-local.
    702     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
    703       LocalCache = MemDepResult::getNonLocal();
    704     else
    705       LocalCache = MemDepResult::getNonFuncLocal();
    706   } else {
    707     MemoryLocation MemLoc;
    708     ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
    709     if (MemLoc.Ptr) {
    710       // If we can do a pointer scan, make it happen.
    711       bool isLoad = !(MR & MRI_Mod);
    712       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
    713         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
    714 
    715       LocalCache = getPointerDependencyFrom(
    716           MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
    717     } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
    718       CallSite QueryCS(QueryInst);
    719       bool isReadOnly = AA.onlyReadsMemory(QueryCS);
    720       LocalCache = getCallSiteDependencyFrom(
    721           QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
    722     } else
    723       // Non-memory instruction.
    724       LocalCache = MemDepResult::getUnknown();
    725   }
    726 
    727   // Remember the result!
    728   if (Instruction *I = LocalCache.getInst())
    729     ReverseLocalDeps[I].insert(QueryInst);
    730 
    731   return LocalCache;
    732 }
    733 
    734 #ifndef NDEBUG
    735 /// This method is used when -debug is specified to verify that cache arrays
    736 /// are properly kept sorted.
    737 static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
    738                          int Count = -1) {
    739   if (Count == -1)
    740     Count = Cache.size();
    741   assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
    742          "Cache isn't sorted!");
    743 }
    744 #endif
    745 
    746 const MemoryDependenceResults::NonLocalDepInfo &
    747 MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
    748   assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
    749          "getNonLocalCallDependency should only be used on calls with "
    750          "non-local deps!");
    751   PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
    752   NonLocalDepInfo &Cache = CacheP.first;
    753 
    754   // This is the set of blocks that need to be recomputed.  In the cached case,
    755   // this can happen due to instructions being deleted etc. In the uncached
    756   // case, this starts out as the set of predecessors we care about.
    757   SmallVector<BasicBlock *, 32> DirtyBlocks;
    758 
    759   if (!Cache.empty()) {
    760     // Okay, we have a cache entry.  If we know it is not dirty, just return it
    761     // with no computation.
    762     if (!CacheP.second) {
    763       ++NumCacheNonLocal;
    764       return Cache;
    765     }
    766 
    767     // If we already have a partially computed set of results, scan them to
    768     // determine what is dirty, seeding our initial DirtyBlocks worklist.
    769     for (auto &Entry : Cache)
    770       if (Entry.getResult().isDirty())
    771         DirtyBlocks.push_back(Entry.getBB());
    772 
    773     // Sort the cache so that we can do fast binary search lookups below.
    774     std::sort(Cache.begin(), Cache.end());
    775 
    776     ++NumCacheDirtyNonLocal;
    777     // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
    778     //     << Cache.size() << " cached: " << *QueryInst;
    779   } else {
    780     // Seed DirtyBlocks with each of the preds of QueryInst's block.
    781     BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
    782     for (BasicBlock *Pred : PredCache.get(QueryBB))
    783       DirtyBlocks.push_back(Pred);
    784     ++NumUncacheNonLocal;
    785   }
    786 
    787   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
    788   bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
    789 
    790   SmallPtrSet<BasicBlock *, 32> Visited;
    791 
    792   unsigned NumSortedEntries = Cache.size();
    793   DEBUG(AssertSorted(Cache));
    794 
    795   // Iterate while we still have blocks to update.
    796   while (!DirtyBlocks.empty()) {
    797     BasicBlock *DirtyBB = DirtyBlocks.back();
    798     DirtyBlocks.pop_back();
    799 
    800     // Already processed this block?
    801     if (!Visited.insert(DirtyBB).second)
    802       continue;
    803 
    804     // Do a binary search to see if we already have an entry for this block in
    805     // the cache set.  If so, find it.
    806     DEBUG(AssertSorted(Cache, NumSortedEntries));
    807     NonLocalDepInfo::iterator Entry =
    808         std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
    809                          NonLocalDepEntry(DirtyBB));
    810     if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
    811       --Entry;
    812 
    813     NonLocalDepEntry *ExistingResult = nullptr;
    814     if (Entry != Cache.begin() + NumSortedEntries &&
    815         Entry->getBB() == DirtyBB) {
    816       // If we already have an entry, and if it isn't already dirty, the block
    817       // is done.
    818       if (!Entry->getResult().isDirty())
    819         continue;
    820 
    821       // Otherwise, remember this slot so we can update the value.
    822       ExistingResult = &*Entry;
    823     }
    824 
    825     // If the dirty entry has a pointer, start scanning from it so we don't have
    826     // to rescan the entire block.
    827     BasicBlock::iterator ScanPos = DirtyBB->end();
    828     if (ExistingResult) {
    829       if (Instruction *Inst = ExistingResult->getResult().getInst()) {
    830         ScanPos = Inst->getIterator();
    831         // We're removing QueryInst's use of Inst.
    832         RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
    833                              QueryCS.getInstruction());
    834       }
    835     }
    836 
    837     // Find out if this block has a local dependency for QueryInst.
    838     MemDepResult Dep;
    839 
    840     if (ScanPos != DirtyBB->begin()) {
    841       Dep =
    842           getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
    843     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
    844       // No dependence found.  If this is the entry block of the function, it is
    845       // a clobber, otherwise it is unknown.
    846       Dep = MemDepResult::getNonLocal();
    847     } else {
    848       Dep = MemDepResult::getNonFuncLocal();
    849     }
    850 
    851     // If we had a dirty entry for the block, update it.  Otherwise, just add
    852     // a new entry.
    853     if (ExistingResult)
    854       ExistingResult->setResult(Dep);
    855     else
    856       Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
    857 
    858     // If the block has a dependency (i.e. it isn't completely transparent to
    859     // the value), remember the association!
    860     if (!Dep.isNonLocal()) {
    861       // Keep the ReverseNonLocalDeps map up to date so we can efficiently
    862       // update this when we remove instructions.
    863       if (Instruction *Inst = Dep.getInst())
    864         ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
    865     } else {
    866 
    867       // If the block *is* completely transparent to the load, we need to check
    868       // the predecessors of this block.  Add them to our worklist.
    869       for (BasicBlock *Pred : PredCache.get(DirtyBB))
    870         DirtyBlocks.push_back(Pred);
    871     }
    872   }
    873 
    874   return Cache;
    875 }
    876 
    877 void MemoryDependenceResults::getNonLocalPointerDependency(
    878     Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
    879   const MemoryLocation Loc = MemoryLocation::get(QueryInst);
    880   bool isLoad = isa<LoadInst>(QueryInst);
    881   BasicBlock *FromBB = QueryInst->getParent();
    882   assert(FromBB);
    883 
    884   assert(Loc.Ptr->getType()->isPointerTy() &&
    885          "Can't get pointer deps of a non-pointer!");
    886   Result.clear();
    887 
    888   // This routine does not expect to deal with volatile instructions.
    889   // Doing so would require piping through the QueryInst all the way through.
    890   // TODO: volatiles can't be elided, but they can be reordered with other
    891   // non-volatile accesses.
    892 
    893   // We currently give up on any instruction which is ordered, but we do handle
    894   // atomic instructions which are unordered.
    895   // TODO: Handle ordered instructions
    896   auto isOrdered = [](Instruction *Inst) {
    897     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    898       return !LI->isUnordered();
    899     } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    900       return !SI->isUnordered();
    901     }
    902     return false;
    903   };
    904   if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
    905     Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
    906                                        const_cast<Value *>(Loc.Ptr)));
    907     return;
    908   }
    909   const DataLayout &DL = FromBB->getModule()->getDataLayout();
    910   PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
    911 
    912   // This is the set of blocks we've inspected, and the pointer we consider in
    913   // each block.  Because of critical edges, we currently bail out if querying
    914   // a block with multiple different pointers.  This can happen during PHI
    915   // translation.
    916   DenseMap<BasicBlock *, Value *> Visited;
    917   if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
    918                                    Result, Visited, true))
    919     return;
    920   Result.clear();
    921   Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
    922                                      const_cast<Value *>(Loc.Ptr)));
    923 }
    924 
    925 /// Compute the memdep value for BB with Pointer/PointeeSize using either
    926 /// cached information in Cache or by doing a lookup (which may use dirty cache
    927 /// info if available).
    928 ///
    929 /// If we do a lookup, add the result to the cache.
    930 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
    931     Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
    932     BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
    933 
    934   // Do a binary search to see if we already have an entry for this block in
    935   // the cache set.  If so, find it.
    936   NonLocalDepInfo::iterator Entry = std::upper_bound(
    937       Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
    938   if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
    939     --Entry;
    940 
    941   NonLocalDepEntry *ExistingResult = nullptr;
    942   if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
    943     ExistingResult = &*Entry;
    944 
    945   // If we have a cached entry, and it is non-dirty, use it as the value for
    946   // this dependency.
    947   if (ExistingResult && !ExistingResult->getResult().isDirty()) {
    948     ++NumCacheNonLocalPtr;
    949     return ExistingResult->getResult();
    950   }
    951 
    952   // Otherwise, we have to scan for the value.  If we have a dirty cache
    953   // entry, start scanning from its position, otherwise we scan from the end
    954   // of the block.
    955   BasicBlock::iterator ScanPos = BB->end();
    956   if (ExistingResult && ExistingResult->getResult().getInst()) {
    957     assert(ExistingResult->getResult().getInst()->getParent() == BB &&
    958            "Instruction invalidated?");
    959     ++NumCacheDirtyNonLocalPtr;
    960     ScanPos = ExistingResult->getResult().getInst()->getIterator();
    961 
    962     // Eliminating the dirty entry from 'Cache', so update the reverse info.
    963     ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
    964     RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
    965   } else {
    966     ++NumUncacheNonLocalPtr;
    967   }
    968 
    969   // Scan the block for the dependency.
    970   MemDepResult Dep =
    971       getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
    972 
    973   // If we had a dirty entry for the block, update it.  Otherwise, just add
    974   // a new entry.
    975   if (ExistingResult)
    976     ExistingResult->setResult(Dep);
    977   else
    978     Cache->push_back(NonLocalDepEntry(BB, Dep));
    979 
    980   // If the block has a dependency (i.e. it isn't completely transparent to
    981   // the value), remember the reverse association because we just added it
    982   // to Cache!
    983   if (!Dep.isDef() && !Dep.isClobber())
    984     return Dep;
    985 
    986   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
    987   // update MemDep when we remove instructions.
    988   Instruction *Inst = Dep.getInst();
    989   assert(Inst && "Didn't depend on anything?");
    990   ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
    991   ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
    992   return Dep;
    993 }
    994 
    995 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
    996 /// array that are already properly ordered.
    997 ///
    998 /// This is optimized for the case when only a few entries are added.
    999 static void
   1000 SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
   1001                          unsigned NumSortedEntries) {
   1002   switch (Cache.size() - NumSortedEntries) {
   1003   case 0:
   1004     // done, no new entries.
   1005     break;
   1006   case 2: {
   1007     // Two new entries, insert the last one into place.
   1008     NonLocalDepEntry Val = Cache.back();
   1009     Cache.pop_back();
   1010     MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
   1011         std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
   1012     Cache.insert(Entry, Val);
   1013     // FALL THROUGH.
   1014   }
   1015   case 1:
   1016     // One new entry, Just insert the new value at the appropriate position.
   1017     if (Cache.size() != 1) {
   1018       NonLocalDepEntry Val = Cache.back();
   1019       Cache.pop_back();
   1020       MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
   1021           std::upper_bound(Cache.begin(), Cache.end(), Val);
   1022       Cache.insert(Entry, Val);
   1023     }
   1024     break;
   1025   default:
   1026     // Added many values, do a full scale sort.
   1027     std::sort(Cache.begin(), Cache.end());
   1028     break;
   1029   }
   1030 }
   1031 
   1032 /// Perform a dependency query based on pointer/pointeesize starting at the end
   1033 /// of StartBB.
   1034 ///
   1035 /// Add any clobber/def results to the results vector and keep track of which
   1036 /// blocks are visited in 'Visited'.
   1037 ///
   1038 /// This has special behavior for the first block queries (when SkipFirstBlock
   1039 /// is true).  In this special case, it ignores the contents of the specified
   1040 /// block and starts returning dependence info for its predecessors.
   1041 ///
   1042 /// This function returns true on success, or false to indicate that it could
   1043 /// not compute dependence information for some reason.  This should be treated
   1044 /// as a clobber dependence on the first instruction in the predecessor block.
   1045 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
   1046     Instruction *QueryInst, const PHITransAddr &Pointer,
   1047     const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
   1048     SmallVectorImpl<NonLocalDepResult> &Result,
   1049     DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
   1050   // Look up the cached info for Pointer.
   1051   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
   1052 
   1053   // Set up a temporary NLPI value. If the map doesn't yet have an entry for
   1054   // CacheKey, this value will be inserted as the associated value. Otherwise,
   1055   // it'll be ignored, and we'll have to check to see if the cached size and
   1056   // aa tags are consistent with the current query.
   1057   NonLocalPointerInfo InitialNLPI;
   1058   InitialNLPI.Size = Loc.Size;
   1059   InitialNLPI.AATags = Loc.AATags;
   1060 
   1061   // Get the NLPI for CacheKey, inserting one into the map if it doesn't
   1062   // already have one.
   1063   std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
   1064       NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
   1065   NonLocalPointerInfo *CacheInfo = &Pair.first->second;
   1066 
   1067   // If we already have a cache entry for this CacheKey, we may need to do some
   1068   // work to reconcile the cache entry and the current query.
   1069   if (!Pair.second) {
   1070     if (CacheInfo->Size < Loc.Size) {
   1071       // The query's Size is greater than the cached one. Throw out the
   1072       // cached data and proceed with the query at the greater size.
   1073       CacheInfo->Pair = BBSkipFirstBlockPair();
   1074       CacheInfo->Size = Loc.Size;
   1075       for (auto &Entry : CacheInfo->NonLocalDeps)
   1076         if (Instruction *Inst = Entry.getResult().getInst())
   1077           RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
   1078       CacheInfo->NonLocalDeps.clear();
   1079     } else if (CacheInfo->Size > Loc.Size) {
   1080       // This query's Size is less than the cached one. Conservatively restart
   1081       // the query using the greater size.
   1082       return getNonLocalPointerDepFromBB(
   1083           QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
   1084           StartBB, Result, Visited, SkipFirstBlock);
   1085     }
   1086 
   1087     // If the query's AATags are inconsistent with the cached one,
   1088     // conservatively throw out the cached data and restart the query with
   1089     // no tag if needed.
   1090     if (CacheInfo->AATags != Loc.AATags) {
   1091       if (CacheInfo->AATags) {
   1092         CacheInfo->Pair = BBSkipFirstBlockPair();
   1093         CacheInfo->AATags = AAMDNodes();
   1094         for (auto &Entry : CacheInfo->NonLocalDeps)
   1095           if (Instruction *Inst = Entry.getResult().getInst())
   1096             RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
   1097         CacheInfo->NonLocalDeps.clear();
   1098       }
   1099       if (Loc.AATags)
   1100         return getNonLocalPointerDepFromBB(
   1101             QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
   1102             Visited, SkipFirstBlock);
   1103     }
   1104   }
   1105 
   1106   NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
   1107 
   1108   // If we have valid cached information for exactly the block we are
   1109   // investigating, just return it with no recomputation.
   1110   if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
   1111     // We have a fully cached result for this query then we can just return the
   1112     // cached results and populate the visited set.  However, we have to verify
   1113     // that we don't already have conflicting results for these blocks.  Check
   1114     // to ensure that if a block in the results set is in the visited set that
   1115     // it was for the same pointer query.
   1116     if (!Visited.empty()) {
   1117       for (auto &Entry : *Cache) {
   1118         DenseMap<BasicBlock *, Value *>::iterator VI =
   1119             Visited.find(Entry.getBB());
   1120         if (VI == Visited.end() || VI->second == Pointer.getAddr())
   1121           continue;
   1122 
   1123         // We have a pointer mismatch in a block.  Just return false, saying
   1124         // that something was clobbered in this result.  We could also do a
   1125         // non-fully cached query, but there is little point in doing this.
   1126         return false;
   1127       }
   1128     }
   1129 
   1130     Value *Addr = Pointer.getAddr();
   1131     for (auto &Entry : *Cache) {
   1132       Visited.insert(std::make_pair(Entry.getBB(), Addr));
   1133       if (Entry.getResult().isNonLocal()) {
   1134         continue;
   1135       }
   1136 
   1137       if (DT.isReachableFromEntry(Entry.getBB())) {
   1138         Result.push_back(
   1139             NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
   1140       }
   1141     }
   1142     ++NumCacheCompleteNonLocalPtr;
   1143     return true;
   1144   }
   1145 
   1146   // Otherwise, either this is a new block, a block with an invalid cache
   1147   // pointer or one that we're about to invalidate by putting more info into it
   1148   // than its valid cache info.  If empty, the result will be valid cache info,
   1149   // otherwise it isn't.
   1150   if (Cache->empty())
   1151     CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
   1152   else
   1153     CacheInfo->Pair = BBSkipFirstBlockPair();
   1154 
   1155   SmallVector<BasicBlock *, 32> Worklist;
   1156   Worklist.push_back(StartBB);
   1157 
   1158   // PredList used inside loop.
   1159   SmallVector<std::pair<BasicBlock *, PHITransAddr>, 16> PredList;
   1160 
   1161   // Keep track of the entries that we know are sorted.  Previously cached
   1162   // entries will all be sorted.  The entries we add we only sort on demand (we
   1163   // don't insert every element into its sorted position).  We know that we
   1164   // won't get any reuse from currently inserted values, because we don't
   1165   // revisit blocks after we insert info for them.
   1166   unsigned NumSortedEntries = Cache->size();
   1167   unsigned WorklistEntries = BlockNumberLimit;
   1168   bool GotWorklistLimit = false;
   1169   DEBUG(AssertSorted(*Cache));
   1170 
   1171   while (!Worklist.empty()) {
   1172     BasicBlock *BB = Worklist.pop_back_val();
   1173 
   1174     // If we do process a large number of blocks it becomes very expensive and
   1175     // likely it isn't worth worrying about
   1176     if (Result.size() > NumResultsLimit) {
   1177       Worklist.clear();
   1178       // Sort it now (if needed) so that recursive invocations of
   1179       // getNonLocalPointerDepFromBB and other routines that could reuse the
   1180       // cache value will only see properly sorted cache arrays.
   1181       if (Cache && NumSortedEntries != Cache->size()) {
   1182         SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1183       }
   1184       // Since we bail out, the "Cache" set won't contain all of the
   1185       // results for the query.  This is ok (we can still use it to accelerate
   1186       // specific block queries) but we can't do the fastpath "return all
   1187       // results from the set".  Clear out the indicator for this.
   1188       CacheInfo->Pair = BBSkipFirstBlockPair();
   1189       return false;
   1190     }
   1191 
   1192     // Skip the first block if we have it.
   1193     if (!SkipFirstBlock) {
   1194       // Analyze the dependency of *Pointer in FromBB.  See if we already have
   1195       // been here.
   1196       assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
   1197 
   1198       // Get the dependency info for Pointer in BB.  If we have cached
   1199       // information, we will use it, otherwise we compute it.
   1200       DEBUG(AssertSorted(*Cache, NumSortedEntries));
   1201       MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
   1202                                                  Cache, NumSortedEntries);
   1203 
   1204       // If we got a Def or Clobber, add this to the list of results.
   1205       if (!Dep.isNonLocal()) {
   1206         if (DT.isReachableFromEntry(BB)) {
   1207           Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
   1208           continue;
   1209         }
   1210       }
   1211     }
   1212 
   1213     // If 'Pointer' is an instruction defined in this block, then we need to do
   1214     // phi translation to change it into a value live in the predecessor block.
   1215     // If not, we just add the predecessors to the worklist and scan them with
   1216     // the same Pointer.
   1217     if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
   1218       SkipFirstBlock = false;
   1219       SmallVector<BasicBlock *, 16> NewBlocks;
   1220       for (BasicBlock *Pred : PredCache.get(BB)) {
   1221         // Verify that we haven't looked at this block yet.
   1222         std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
   1223             Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
   1224         if (InsertRes.second) {
   1225           // First time we've looked at *PI.
   1226           NewBlocks.push_back(Pred);
   1227           continue;
   1228         }
   1229 
   1230         // If we have seen this block before, but it was with a different
   1231         // pointer then we have a phi translation failure and we have to treat
   1232         // this as a clobber.
   1233         if (InsertRes.first->second != Pointer.getAddr()) {
   1234           // Make sure to clean up the Visited map before continuing on to
   1235           // PredTranslationFailure.
   1236           for (unsigned i = 0; i < NewBlocks.size(); i++)
   1237             Visited.erase(NewBlocks[i]);
   1238           goto PredTranslationFailure;
   1239         }
   1240       }
   1241       if (NewBlocks.size() > WorklistEntries) {
   1242         // Make sure to clean up the Visited map before continuing on to
   1243         // PredTranslationFailure.
   1244         for (unsigned i = 0; i < NewBlocks.size(); i++)
   1245           Visited.erase(NewBlocks[i]);
   1246         GotWorklistLimit = true;
   1247         goto PredTranslationFailure;
   1248       }
   1249       WorklistEntries -= NewBlocks.size();
   1250       Worklist.append(NewBlocks.begin(), NewBlocks.end());
   1251       continue;
   1252     }
   1253 
   1254     // We do need to do phi translation, if we know ahead of time we can't phi
   1255     // translate this value, don't even try.
   1256     if (!Pointer.IsPotentiallyPHITranslatable())
   1257       goto PredTranslationFailure;
   1258 
   1259     // We may have added values to the cache list before this PHI translation.
   1260     // If so, we haven't done anything to ensure that the cache remains sorted.
   1261     // Sort it now (if needed) so that recursive invocations of
   1262     // getNonLocalPointerDepFromBB and other routines that could reuse the cache
   1263     // value will only see properly sorted cache arrays.
   1264     if (Cache && NumSortedEntries != Cache->size()) {
   1265       SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1266       NumSortedEntries = Cache->size();
   1267     }
   1268     Cache = nullptr;
   1269 
   1270     PredList.clear();
   1271     for (BasicBlock *Pred : PredCache.get(BB)) {
   1272       PredList.push_back(std::make_pair(Pred, Pointer));
   1273 
   1274       // Get the PHI translated pointer in this predecessor.  This can fail if
   1275       // not translatable, in which case the getAddr() returns null.
   1276       PHITransAddr &PredPointer = PredList.back().second;
   1277       PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
   1278       Value *PredPtrVal = PredPointer.getAddr();
   1279 
   1280       // Check to see if we have already visited this pred block with another
   1281       // pointer.  If so, we can't do this lookup.  This failure can occur
   1282       // with PHI translation when a critical edge exists and the PHI node in
   1283       // the successor translates to a pointer value different than the
   1284       // pointer the block was first analyzed with.
   1285       std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
   1286           Visited.insert(std::make_pair(Pred, PredPtrVal));
   1287 
   1288       if (!InsertRes.second) {
   1289         // We found the pred; take it off the list of preds to visit.
   1290         PredList.pop_back();
   1291 
   1292         // If the predecessor was visited with PredPtr, then we already did
   1293         // the analysis and can ignore it.
   1294         if (InsertRes.first->second == PredPtrVal)
   1295           continue;
   1296 
   1297         // Otherwise, the block was previously analyzed with a different
   1298         // pointer.  We can't represent the result of this case, so we just
   1299         // treat this as a phi translation failure.
   1300 
   1301         // Make sure to clean up the Visited map before continuing on to
   1302         // PredTranslationFailure.
   1303         for (unsigned i = 0, n = PredList.size(); i < n; ++i)
   1304           Visited.erase(PredList[i].first);
   1305 
   1306         goto PredTranslationFailure;
   1307       }
   1308     }
   1309 
   1310     // Actually process results here; this need to be a separate loop to avoid
   1311     // calling getNonLocalPointerDepFromBB for blocks we don't want to return
   1312     // any results for.  (getNonLocalPointerDepFromBB will modify our
   1313     // datastructures in ways the code after the PredTranslationFailure label
   1314     // doesn't expect.)
   1315     for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
   1316       BasicBlock *Pred = PredList[i].first;
   1317       PHITransAddr &PredPointer = PredList[i].second;
   1318       Value *PredPtrVal = PredPointer.getAddr();
   1319 
   1320       bool CanTranslate = true;
   1321       // If PHI translation was unable to find an available pointer in this
   1322       // predecessor, then we have to assume that the pointer is clobbered in
   1323       // that predecessor.  We can still do PRE of the load, which would insert
   1324       // a computation of the pointer in this predecessor.
   1325       if (!PredPtrVal)
   1326         CanTranslate = false;
   1327 
   1328       // FIXME: it is entirely possible that PHI translating will end up with
   1329       // the same value.  Consider PHI translating something like:
   1330       // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
   1331       // to recurse here, pedantically speaking.
   1332 
   1333       // If getNonLocalPointerDepFromBB fails here, that means the cached
   1334       // result conflicted with the Visited list; we have to conservatively
   1335       // assume it is unknown, but this also does not block PRE of the load.
   1336       if (!CanTranslate ||
   1337           !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
   1338                                       Loc.getWithNewPtr(PredPtrVal), isLoad,
   1339                                       Pred, Result, Visited)) {
   1340         // Add the entry to the Result list.
   1341         NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
   1342         Result.push_back(Entry);
   1343 
   1344         // Since we had a phi translation failure, the cache for CacheKey won't
   1345         // include all of the entries that we need to immediately satisfy future
   1346         // queries.  Mark this in NonLocalPointerDeps by setting the
   1347         // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
   1348         // cached value to do more work but not miss the phi trans failure.
   1349         NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
   1350         NLPI.Pair = BBSkipFirstBlockPair();
   1351         continue;
   1352       }
   1353     }
   1354 
   1355     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
   1356     CacheInfo = &NonLocalPointerDeps[CacheKey];
   1357     Cache = &CacheInfo->NonLocalDeps;
   1358     NumSortedEntries = Cache->size();
   1359 
   1360     // Since we did phi translation, the "Cache" set won't contain all of the
   1361     // results for the query.  This is ok (we can still use it to accelerate
   1362     // specific block queries) but we can't do the fastpath "return all
   1363     // results from the set"  Clear out the indicator for this.
   1364     CacheInfo->Pair = BBSkipFirstBlockPair();
   1365     SkipFirstBlock = false;
   1366     continue;
   1367 
   1368   PredTranslationFailure:
   1369     // The following code is "failure"; we can't produce a sane translation
   1370     // for the given block.  It assumes that we haven't modified any of
   1371     // our datastructures while processing the current block.
   1372 
   1373     if (!Cache) {
   1374       // Refresh the CacheInfo/Cache pointer if it got invalidated.
   1375       CacheInfo = &NonLocalPointerDeps[CacheKey];
   1376       Cache = &CacheInfo->NonLocalDeps;
   1377       NumSortedEntries = Cache->size();
   1378     }
   1379 
   1380     // Since we failed phi translation, the "Cache" set won't contain all of the
   1381     // results for the query.  This is ok (we can still use it to accelerate
   1382     // specific block queries) but we can't do the fastpath "return all
   1383     // results from the set".  Clear out the indicator for this.
   1384     CacheInfo->Pair = BBSkipFirstBlockPair();
   1385 
   1386     // If *nothing* works, mark the pointer as unknown.
   1387     //
   1388     // If this is the magic first block, return this as a clobber of the whole
   1389     // incoming value.  Since we can't phi translate to one of the predecessors,
   1390     // we have to bail out.
   1391     if (SkipFirstBlock)
   1392       return false;
   1393 
   1394     bool foundBlock = false;
   1395     for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
   1396       if (I.getBB() != BB)
   1397         continue;
   1398 
   1399       assert((GotWorklistLimit || I.getResult().isNonLocal() ||
   1400               !DT.isReachableFromEntry(BB)) &&
   1401              "Should only be here with transparent block");
   1402       foundBlock = true;
   1403       I.setResult(MemDepResult::getUnknown());
   1404       Result.push_back(
   1405           NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
   1406       break;
   1407     }
   1408     (void)foundBlock; (void)GotWorklistLimit;
   1409     assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
   1410   }
   1411 
   1412   // Okay, we're done now.  If we added new values to the cache, re-sort it.
   1413   SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1414   DEBUG(AssertSorted(*Cache));
   1415   return true;
   1416 }
   1417 
   1418 /// If P exists in CachedNonLocalPointerInfo, remove it.
   1419 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
   1420     ValueIsLoadPair P) {
   1421   CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
   1422   if (It == NonLocalPointerDeps.end())
   1423     return;
   1424 
   1425   // Remove all of the entries in the BB->val map.  This involves removing
   1426   // instructions from the reverse map.
   1427   NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
   1428 
   1429   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
   1430     Instruction *Target = PInfo[i].getResult().getInst();
   1431     if (!Target)
   1432       continue; // Ignore non-local dep results.
   1433     assert(Target->getParent() == PInfo[i].getBB());
   1434 
   1435     // Eliminating the dirty entry from 'Cache', so update the reverse info.
   1436     RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
   1437   }
   1438 
   1439   // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
   1440   NonLocalPointerDeps.erase(It);
   1441 }
   1442 
   1443 void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
   1444   // If Ptr isn't really a pointer, just ignore it.
   1445   if (!Ptr->getType()->isPointerTy())
   1446     return;
   1447   // Flush store info for the pointer.
   1448   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
   1449   // Flush load info for the pointer.
   1450   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
   1451 }
   1452 
   1453 void MemoryDependenceResults::invalidateCachedPredecessors() {
   1454   PredCache.clear();
   1455 }
   1456 
   1457 void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
   1458   // Walk through the Non-local dependencies, removing this one as the value
   1459   // for any cached queries.
   1460   NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
   1461   if (NLDI != NonLocalDeps.end()) {
   1462     NonLocalDepInfo &BlockMap = NLDI->second.first;
   1463     for (auto &Entry : BlockMap)
   1464       if (Instruction *Inst = Entry.getResult().getInst())
   1465         RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
   1466     NonLocalDeps.erase(NLDI);
   1467   }
   1468 
   1469   // If we have a cached local dependence query for this instruction, remove it.
   1470   //
   1471   LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
   1472   if (LocalDepEntry != LocalDeps.end()) {
   1473     // Remove us from DepInst's reverse set now that the local dep info is gone.
   1474     if (Instruction *Inst = LocalDepEntry->second.getInst())
   1475       RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
   1476 
   1477     // Remove this local dependency info.
   1478     LocalDeps.erase(LocalDepEntry);
   1479   }
   1480 
   1481   // If we have any cached pointer dependencies on this instruction, remove
   1482   // them.  If the instruction has non-pointer type, then it can't be a pointer
   1483   // base.
   1484 
   1485   // Remove it from both the load info and the store info.  The instruction
   1486   // can't be in either of these maps if it is non-pointer.
   1487   if (RemInst->getType()->isPointerTy()) {
   1488     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
   1489     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
   1490   }
   1491 
   1492   // Loop over all of the things that depend on the instruction we're removing.
   1493   //
   1494   SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd;
   1495 
   1496   // If we find RemInst as a clobber or Def in any of the maps for other values,
   1497   // we need to replace its entry with a dirty version of the instruction after
   1498   // it.  If RemInst is a terminator, we use a null dirty value.
   1499   //
   1500   // Using a dirty version of the instruction after RemInst saves having to scan
   1501   // the entire block to get to this point.
   1502   MemDepResult NewDirtyVal;
   1503   if (!RemInst->isTerminator())
   1504     NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
   1505 
   1506   ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
   1507   if (ReverseDepIt != ReverseLocalDeps.end()) {
   1508     // RemInst can't be the terminator if it has local stuff depending on it.
   1509     assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
   1510            "Nothing can locally depend on a terminator");
   1511 
   1512     for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
   1513       assert(InstDependingOnRemInst != RemInst &&
   1514              "Already removed our local dep info");
   1515 
   1516       LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
   1517 
   1518       // Make sure to remember that new things depend on NewDepInst.
   1519       assert(NewDirtyVal.getInst() &&
   1520              "There is no way something else can have "
   1521              "a local dep on this if it is a terminator!");
   1522       ReverseDepsToAdd.push_back(
   1523           std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
   1524     }
   1525 
   1526     ReverseLocalDeps.erase(ReverseDepIt);
   1527 
   1528     // Add new reverse deps after scanning the set, to avoid invalidating the
   1529     // 'ReverseDeps' reference.
   1530     while (!ReverseDepsToAdd.empty()) {
   1531       ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
   1532           ReverseDepsToAdd.back().second);
   1533       ReverseDepsToAdd.pop_back();
   1534     }
   1535   }
   1536 
   1537   ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
   1538   if (ReverseDepIt != ReverseNonLocalDeps.end()) {
   1539     for (Instruction *I : ReverseDepIt->second) {
   1540       assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
   1541 
   1542       PerInstNLInfo &INLD = NonLocalDeps[I];
   1543       // The information is now dirty!
   1544       INLD.second = true;
   1545 
   1546       for (auto &Entry : INLD.first) {
   1547         if (Entry.getResult().getInst() != RemInst)
   1548           continue;
   1549 
   1550         // Convert to a dirty entry for the subsequent instruction.
   1551         Entry.setResult(NewDirtyVal);
   1552 
   1553         if (Instruction *NextI = NewDirtyVal.getInst())
   1554           ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
   1555       }
   1556     }
   1557 
   1558     ReverseNonLocalDeps.erase(ReverseDepIt);
   1559 
   1560     // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
   1561     while (!ReverseDepsToAdd.empty()) {
   1562       ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
   1563           ReverseDepsToAdd.back().second);
   1564       ReverseDepsToAdd.pop_back();
   1565     }
   1566   }
   1567 
   1568   // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
   1569   // value in the NonLocalPointerDeps info.
   1570   ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
   1571       ReverseNonLocalPtrDeps.find(RemInst);
   1572   if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
   1573     SmallVector<std::pair<Instruction *, ValueIsLoadPair>, 8>
   1574         ReversePtrDepsToAdd;
   1575 
   1576     for (ValueIsLoadPair P : ReversePtrDepIt->second) {
   1577       assert(P.getPointer() != RemInst &&
   1578              "Already removed NonLocalPointerDeps info for RemInst");
   1579 
   1580       NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
   1581 
   1582       // The cache is not valid for any specific block anymore.
   1583       NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
   1584 
   1585       // Update any entries for RemInst to use the instruction after it.
   1586       for (auto &Entry : NLPDI) {
   1587         if (Entry.getResult().getInst() != RemInst)
   1588           continue;
   1589 
   1590         // Convert to a dirty entry for the subsequent instruction.
   1591         Entry.setResult(NewDirtyVal);
   1592 
   1593         if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
   1594           ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
   1595       }
   1596 
   1597       // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
   1598       // subsequent value may invalidate the sortedness.
   1599       std::sort(NLPDI.begin(), NLPDI.end());
   1600     }
   1601 
   1602     ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
   1603 
   1604     while (!ReversePtrDepsToAdd.empty()) {
   1605       ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
   1606           ReversePtrDepsToAdd.back().second);
   1607       ReversePtrDepsToAdd.pop_back();
   1608     }
   1609   }
   1610 
   1611   assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
   1612   DEBUG(verifyRemoved(RemInst));
   1613 }
   1614 
   1615 /// Verify that the specified instruction does not occur in our internal data
   1616 /// structures.
   1617 ///
   1618 /// This function verifies by asserting in debug builds.
   1619 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
   1620 #ifndef NDEBUG
   1621   for (const auto &DepKV : LocalDeps) {
   1622     assert(DepKV.first != D && "Inst occurs in data structures");
   1623     assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
   1624   }
   1625 
   1626   for (const auto &DepKV : NonLocalPointerDeps) {
   1627     assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
   1628     for (const auto &Entry : DepKV.second.NonLocalDeps)
   1629       assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
   1630   }
   1631 
   1632   for (const auto &DepKV : NonLocalDeps) {
   1633     assert(DepKV.first != D && "Inst occurs in data structures");
   1634     const PerInstNLInfo &INLD = DepKV.second;
   1635     for (const auto &Entry : INLD.first)
   1636       assert(Entry.getResult().getInst() != D &&
   1637              "Inst occurs in data structures");
   1638   }
   1639 
   1640   for (const auto &DepKV : ReverseLocalDeps) {
   1641     assert(DepKV.first != D && "Inst occurs in data structures");
   1642     for (Instruction *Inst : DepKV.second)
   1643       assert(Inst != D && "Inst occurs in data structures");
   1644   }
   1645 
   1646   for (const auto &DepKV : ReverseNonLocalDeps) {
   1647     assert(DepKV.first != D && "Inst occurs in data structures");
   1648     for (Instruction *Inst : DepKV.second)
   1649       assert(Inst != D && "Inst occurs in data structures");
   1650   }
   1651 
   1652   for (const auto &DepKV : ReverseNonLocalPtrDeps) {
   1653     assert(DepKV.first != D && "Inst occurs in rev NLPD map");
   1654 
   1655     for (ValueIsLoadPair P : DepKV.second)
   1656       assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
   1657              "Inst occurs in ReverseNonLocalPtrDeps map");
   1658   }
   1659 #endif
   1660 }
   1661 
   1662 char MemoryDependenceAnalysis::PassID;
   1663 
   1664 MemoryDependenceResults
   1665 MemoryDependenceAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
   1666   auto &AA = AM.getResult<AAManager>(F);
   1667   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   1668   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   1669   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   1670   return MemoryDependenceResults(AA, AC, TLI, DT);
   1671 }
   1672 
   1673 char MemoryDependenceWrapperPass::ID = 0;
   1674 
   1675 INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep",
   1676                       "Memory Dependence Analysis", false, true)
   1677 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
   1678 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
   1679 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
   1680 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
   1681 INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
   1682                     "Memory Dependence Analysis", false, true)
   1683 
   1684 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
   1685   initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
   1686 }
   1687 MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() {}
   1688 
   1689 void MemoryDependenceWrapperPass::releaseMemory() {
   1690   MemDep.reset();
   1691 }
   1692 
   1693 void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   1694   AU.setPreservesAll();
   1695   AU.addRequired<AssumptionCacheTracker>();
   1696   AU.addRequired<DominatorTreeWrapperPass>();
   1697   AU.addRequiredTransitive<AAResultsWrapperPass>();
   1698   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
   1699 }
   1700 
   1701 bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
   1702   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
   1703   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   1704   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   1705   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   1706   MemDep.emplace(AA, AC, TLI, DT);
   1707   return false;
   1708 }
   1709