Home | History | Annotate | Download | only in Analysis
      1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements an analysis that determines, for a given memory
     11 // operation, what preceding memory operations it depends on.  It builds on
     12 // alias analysis information, and tries to provide a lazy, caching interface to
     13 // a common kind of alias information query.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
     18 #include "llvm/ADT/DenseMap.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/SmallPtrSet.h"
     21 #include "llvm/ADT/SmallVector.h"
     22 #include "llvm/ADT/Statistic.h"
     23 #include "llvm/Analysis/AliasAnalysis.h"
     24 #include "llvm/Analysis/AssumptionCache.h"
     25 #include "llvm/Analysis/MemoryBuiltins.h"
     26 #include "llvm/Analysis/MemoryLocation.h"
     27 #include "llvm/Analysis/OrderedBasicBlock.h"
     28 #include "llvm/Analysis/PHITransAddr.h"
     29 #include "llvm/Analysis/PhiValues.h"
     30 #include "llvm/Analysis/TargetLibraryInfo.h"
     31 #include "llvm/Analysis/ValueTracking.h"
     32 #include "llvm/IR/Attributes.h"
     33 #include "llvm/IR/BasicBlock.h"
     34 #include "llvm/IR/CallSite.h"
     35 #include "llvm/IR/Constants.h"
     36 #include "llvm/IR/DataLayout.h"
     37 #include "llvm/IR/DerivedTypes.h"
     38 #include "llvm/IR/Dominators.h"
     39 #include "llvm/IR/Function.h"
     40 #include "llvm/IR/InstrTypes.h"
     41 #include "llvm/IR/Instruction.h"
     42 #include "llvm/IR/Instructions.h"
     43 #include "llvm/IR/IntrinsicInst.h"
     44 #include "llvm/IR/LLVMContext.h"
     45 #include "llvm/IR/Metadata.h"
     46 #include "llvm/IR/Module.h"
     47 #include "llvm/IR/PredIteratorCache.h"
     48 #include "llvm/IR/Type.h"
     49 #include "llvm/IR/Use.h"
     50 #include "llvm/IR/User.h"
     51 #include "llvm/IR/Value.h"
     52 #include "llvm/Pass.h"
     53 #include "llvm/Support/AtomicOrdering.h"
     54 #include "llvm/Support/Casting.h"
     55 #include "llvm/Support/CommandLine.h"
     56 #include "llvm/Support/Compiler.h"
     57 #include "llvm/Support/Debug.h"
     58 #include "llvm/Support/MathExtras.h"
     59 #include <algorithm>
     60 #include <cassert>
     61 #include <cstdint>
     62 #include <iterator>
     63 #include <utility>
     64 
     65 using namespace llvm;
     66 
     67 #define DEBUG_TYPE "memdep"
     68 
     69 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
     70 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
     71 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
     72 
     73 STATISTIC(NumCacheNonLocalPtr,
     74           "Number of fully cached non-local ptr responses");
     75 STATISTIC(NumCacheDirtyNonLocalPtr,
     76           "Number of cached, but dirty, non-local ptr responses");
     77 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
     78 STATISTIC(NumCacheCompleteNonLocalPtr,
     79           "Number of block queries that were completely cached");
     80 
     81 // Limit for the number of instructions to scan in a block.
     82 
     83 static cl::opt<unsigned> BlockScanLimit(
     84     "memdep-block-scan-limit", cl::Hidden, cl::init(100),
     85     cl::desc("The number of instructions to scan in a block in memory "
     86              "dependency analysis (default = 100)"));
     87 
     88 static cl::opt<unsigned>
     89     BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
     90                      cl::desc("The number of blocks to scan during memory "
     91                               "dependency analysis (default = 1000)"));
     92 
     93 // Limit on the number of memdep results to process.
     94 static const unsigned int NumResultsLimit = 100;
     95 
     96 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
     97 ///
     98 /// If the set becomes empty, remove Inst's entry.
     99 template <typename KeyTy>
    100 static void
    101 RemoveFromReverseMap(DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>> &ReverseMap,
    102                      Instruction *Inst, KeyTy Val) {
    103   typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
    104       ReverseMap.find(Inst);
    105   assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
    106   bool Found = InstIt->second.erase(Val);
    107   assert(Found && "Invalid reverse map!");
    108   (void)Found;
    109   if (InstIt->second.empty())
    110     ReverseMap.erase(InstIt);
    111 }
    112 
    113 /// If the given instruction references a specific memory location, fill in Loc
    114 /// with the details, otherwise set Loc.Ptr to null.
    115 ///
    116 /// Returns a ModRefInfo value describing the general behavior of the
    117 /// instruction.
    118 static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
    119                               const TargetLibraryInfo &TLI) {
    120   if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    121     if (LI->isUnordered()) {
    122       Loc = MemoryLocation::get(LI);
    123       return ModRefInfo::Ref;
    124     }
    125     if (LI->getOrdering() == AtomicOrdering::Monotonic) {
    126       Loc = MemoryLocation::get(LI);
    127       return ModRefInfo::ModRef;
    128     }
    129     Loc = MemoryLocation();
    130     return ModRefInfo::ModRef;
    131   }
    132 
    133   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    134     if (SI->isUnordered()) {
    135       Loc = MemoryLocation::get(SI);
    136       return ModRefInfo::Mod;
    137     }
    138     if (SI->getOrdering() == AtomicOrdering::Monotonic) {
    139       Loc = MemoryLocation::get(SI);
    140       return ModRefInfo::ModRef;
    141     }
    142     Loc = MemoryLocation();
    143     return ModRefInfo::ModRef;
    144   }
    145 
    146   if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
    147     Loc = MemoryLocation::get(V);
    148     return ModRefInfo::ModRef;
    149   }
    150 
    151   if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
    152     // calls to free() deallocate the entire structure
    153     Loc = MemoryLocation(CI->getArgOperand(0));
    154     return ModRefInfo::Mod;
    155   }
    156 
    157   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    158     switch (II->getIntrinsicID()) {
    159     case Intrinsic::lifetime_start:
    160     case Intrinsic::lifetime_end:
    161     case Intrinsic::invariant_start:
    162       Loc = MemoryLocation::getForArgument(II, 1, TLI);
    163       // These intrinsics don't really modify the memory, but returning Mod
    164       // will allow them to be handled conservatively.
    165       return ModRefInfo::Mod;
    166     case Intrinsic::invariant_end:
    167       Loc = MemoryLocation::getForArgument(II, 2, TLI);
    168       // These intrinsics don't really modify the memory, but returning Mod
    169       // will allow them to be handled conservatively.
    170       return ModRefInfo::Mod;
    171     default:
    172       break;
    173     }
    174   }
    175 
    176   // Otherwise, just do the coarse-grained thing that always works.
    177   if (Inst->mayWriteToMemory())
    178     return ModRefInfo::ModRef;
    179   if (Inst->mayReadFromMemory())
    180     return ModRefInfo::Ref;
    181   return ModRefInfo::NoModRef;
    182 }
    183 
    184 /// Private helper for finding the local dependencies of a call site.
    185 MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
    186     CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
    187     BasicBlock *BB) {
    188   unsigned Limit = BlockScanLimit;
    189 
    190   // Walk backwards through the block, looking for dependencies.
    191   while (ScanIt != BB->begin()) {
    192     Instruction *Inst = &*--ScanIt;
    193     // Debug intrinsics don't cause dependences and should not affect Limit
    194     if (isa<DbgInfoIntrinsic>(Inst))
    195       continue;
    196 
    197     // Limit the amount of scanning we do so we don't end up with quadratic
    198     // running time on extreme testcases.
    199     --Limit;
    200     if (!Limit)
    201       return MemDepResult::getUnknown();
    202 
    203     // If this inst is a memory op, get the pointer it accessed
    204     MemoryLocation Loc;
    205     ModRefInfo MR = GetLocation(Inst, Loc, TLI);
    206     if (Loc.Ptr) {
    207       // A simple instruction.
    208       if (isModOrRefSet(AA.getModRefInfo(CS, Loc)))
    209         return MemDepResult::getClobber(Inst);
    210       continue;
    211     }
    212 
    213     if (auto InstCS = CallSite(Inst)) {
    214       // If these two calls do not interfere, look past it.
    215       if (isNoModRef(AA.getModRefInfo(CS, InstCS))) {
    216         // If the two calls are the same, return InstCS as a Def, so that
    217         // CS can be found redundant and eliminated.
    218         if (isReadOnlyCall && !isModSet(MR) &&
    219             CS.getInstruction()->isIdenticalToWhenDefined(Inst))
    220           return MemDepResult::getDef(Inst);
    221 
    222         // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
    223         // keep scanning.
    224         continue;
    225       } else
    226         return MemDepResult::getClobber(Inst);
    227     }
    228 
    229     // If we could not obtain a pointer for the instruction and the instruction
    230     // touches memory then assume that this is a dependency.
    231     if (isModOrRefSet(MR))
    232       return MemDepResult::getClobber(Inst);
    233   }
    234 
    235   // No dependence found.  If this is the entry block of the function, it is
    236   // unknown, otherwise it is non-local.
    237   if (BB != &BB->getParent()->getEntryBlock())
    238     return MemDepResult::getNonLocal();
    239   return MemDepResult::getNonFuncLocal();
    240 }
    241 
    242 unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
    243     const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
    244     const LoadInst *LI) {
    245   // We can only extend simple integer loads.
    246   if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
    247     return 0;
    248 
    249   // Load widening is hostile to ThreadSanitizer: it may cause false positives
    250   // or make the reports more cryptic (access sizes are wrong).
    251   if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
    252     return 0;
    253 
    254   const DataLayout &DL = LI->getModule()->getDataLayout();
    255 
    256   // Get the base of this load.
    257   int64_t LIOffs = 0;
    258   const Value *LIBase =
    259       GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
    260 
    261   // If the two pointers are not based on the same pointer, we can't tell that
    262   // they are related.
    263   if (LIBase != MemLocBase)
    264     return 0;
    265 
    266   // Okay, the two values are based on the same pointer, but returned as
    267   // no-alias.  This happens when we have things like two byte loads at "P+1"
    268   // and "P+3".  Check to see if increasing the size of the "LI" load up to its
    269   // alignment (or the largest native integer type) will allow us to load all
    270   // the bits required by MemLoc.
    271 
    272   // If MemLoc is before LI, then no widening of LI will help us out.
    273   if (MemLocOffs < LIOffs)
    274     return 0;
    275 
    276   // Get the alignment of the load in bytes.  We assume that it is safe to load
    277   // any legal integer up to this size without a problem.  For example, if we're
    278   // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
    279   // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
    280   // to i16.
    281   unsigned LoadAlign = LI->getAlignment();
    282 
    283   int64_t MemLocEnd = MemLocOffs + MemLocSize;
    284 
    285   // If no amount of rounding up will let MemLoc fit into LI, then bail out.
    286   if (LIOffs + LoadAlign < MemLocEnd)
    287     return 0;
    288 
    289   // This is the size of the load to try.  Start with the next larger power of
    290   // two.
    291   unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
    292   NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
    293 
    294   while (true) {
    295     // If this load size is bigger than our known alignment or would not fit
    296     // into a native integer register, then we fail.
    297     if (NewLoadByteSize > LoadAlign ||
    298         !DL.fitsInLegalInteger(NewLoadByteSize * 8))
    299       return 0;
    300 
    301     if (LIOffs + NewLoadByteSize > MemLocEnd &&
    302         (LI->getParent()->getParent()->hasFnAttribute(
    303              Attribute::SanitizeAddress) ||
    304          LI->getParent()->getParent()->hasFnAttribute(
    305              Attribute::SanitizeHWAddress)))
    306       // We will be reading past the location accessed by the original program.
    307       // While this is safe in a regular build, Address Safety analysis tools
    308       // may start reporting false warnings. So, don't do widening.
    309       return 0;
    310 
    311     // If a load of this width would include all of MemLoc, then we succeed.
    312     if (LIOffs + NewLoadByteSize >= MemLocEnd)
    313       return NewLoadByteSize;
    314 
    315     NewLoadByteSize <<= 1;
    316   }
    317 }
    318 
    319 static bool isVolatile(Instruction *Inst) {
    320   if (auto *LI = dyn_cast<LoadInst>(Inst))
    321     return LI->isVolatile();
    322   if (auto *SI = dyn_cast<StoreInst>(Inst))
    323     return SI->isVolatile();
    324   if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
    325     return AI->isVolatile();
    326   return false;
    327 }
    328 
    329 MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
    330     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    331     BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
    332   MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
    333   if (QueryInst != nullptr) {
    334     if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
    335       InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
    336 
    337       if (InvariantGroupDependency.isDef())
    338         return InvariantGroupDependency;
    339     }
    340   }
    341   MemDepResult SimpleDep = getSimplePointerDependencyFrom(
    342       MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
    343   if (SimpleDep.isDef())
    344     return SimpleDep;
    345   // Non-local invariant group dependency indicates there is non local Def
    346   // (it only returns nonLocal if it finds nonLocal def), which is better than
    347   // local clobber and everything else.
    348   if (InvariantGroupDependency.isNonLocal())
    349     return InvariantGroupDependency;
    350 
    351   assert(InvariantGroupDependency.isUnknown() &&
    352          "InvariantGroupDependency should be only unknown at this point");
    353   return SimpleDep;
    354 }
    355 
    356 MemDepResult
    357 MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
    358                                                             BasicBlock *BB) {
    359 
    360   if (!LI->getMetadata(LLVMContext::MD_invariant_group))
    361     return MemDepResult::getUnknown();
    362 
    363   // Take the ptr operand after all casts and geps 0. This way we can search
    364   // cast graph down only.
    365   Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
    366 
    367   // It's is not safe to walk the use list of global value, because function
    368   // passes aren't allowed to look outside their functions.
    369   // FIXME: this could be fixed by filtering instructions from outside
    370   // of current function.
    371   if (isa<GlobalValue>(LoadOperand))
    372     return MemDepResult::getUnknown();
    373 
    374   // Queue to process all pointers that are equivalent to load operand.
    375   SmallVector<const Value *, 8> LoadOperandsQueue;
    376   LoadOperandsQueue.push_back(LoadOperand);
    377 
    378   Instruction *ClosestDependency = nullptr;
    379   // Order of instructions in uses list is unpredictible. In order to always
    380   // get the same result, we will look for the closest dominance.
    381   auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
    382     assert(Other && "Must call it with not null instruction");
    383     if (Best == nullptr || DT.dominates(Best, Other))
    384       return Other;
    385     return Best;
    386   };
    387 
    388   // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
    389   // we will see all the instructions. This should be fixed in MSSA.
    390   while (!LoadOperandsQueue.empty()) {
    391     const Value *Ptr = LoadOperandsQueue.pop_back_val();
    392     assert(Ptr && !isa<GlobalValue>(Ptr) &&
    393            "Null or GlobalValue should not be inserted");
    394 
    395     for (const Use &Us : Ptr->uses()) {
    396       auto *U = dyn_cast<Instruction>(Us.getUser());
    397       if (!U || U == LI || !DT.dominates(U, LI))
    398         continue;
    399 
    400       // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
    401       // users.      U = bitcast Ptr
    402       if (isa<BitCastInst>(U)) {
    403         LoadOperandsQueue.push_back(U);
    404         continue;
    405       }
    406       // Gep with zeros is equivalent to bitcast.
    407       // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
    408       // or gep 0 to bitcast because of SROA, so there are 2 forms. When
    409       // typeless pointers will be ready then both cases will be gone
    410       // (and this BFS also won't be needed).
    411       if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
    412         if (GEP->hasAllZeroIndices()) {
    413           LoadOperandsQueue.push_back(U);
    414           continue;
    415         }
    416 
    417       // If we hit load/store with the same invariant.group metadata (and the
    418       // same pointer operand) we can assume that value pointed by pointer
    419       // operand didn't change.
    420       if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
    421           U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
    422         ClosestDependency = GetClosestDependency(ClosestDependency, U);
    423     }
    424   }
    425 
    426   if (!ClosestDependency)
    427     return MemDepResult::getUnknown();
    428   if (ClosestDependency->getParent() == BB)
    429     return MemDepResult::getDef(ClosestDependency);
    430   // Def(U) can't be returned here because it is non-local. If local
    431   // dependency won't be found then return nonLocal counting that the
    432   // user will call getNonLocalPointerDependency, which will return cached
    433   // result.
    434   NonLocalDefsCache.try_emplace(
    435       LI, NonLocalDepResult(ClosestDependency->getParent(),
    436                             MemDepResult::getDef(ClosestDependency), nullptr));
    437   ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
    438   return MemDepResult::getNonLocal();
    439 }
    440 
    441 MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
    442     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    443     BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
    444   bool isInvariantLoad = false;
    445 
    446   if (!Limit) {
    447     unsigned DefaultLimit = BlockScanLimit;
    448     return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
    449                                           &DefaultLimit);
    450   }
    451 
    452   // We must be careful with atomic accesses, as they may allow another thread
    453   //   to touch this location, clobbering it. We are conservative: if the
    454   //   QueryInst is not a simple (non-atomic) memory access, we automatically
    455   //   return getClobber.
    456   // If it is simple, we know based on the results of
    457   // "Compiler testing via a theory of sound optimisations in the C11/C++11
    458   //   memory model" in PLDI 2013, that a non-atomic location can only be
    459   //   clobbered between a pair of a release and an acquire action, with no
    460   //   access to the location in between.
    461   // Here is an example for giving the general intuition behind this rule.
    462   // In the following code:
    463   //   store x 0;
    464   //   release action; [1]
    465   //   acquire action; [4]
    466   //   %val = load x;
    467   // It is unsafe to replace %val by 0 because another thread may be running:
    468   //   acquire action; [2]
    469   //   store x 42;
    470   //   release action; [3]
    471   // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
    472   // being 42. A key property of this program however is that if either
    473   // 1 or 4 were missing, there would be a race between the store of 42
    474   // either the store of 0 or the load (making the whole program racy).
    475   // The paper mentioned above shows that the same property is respected
    476   // by every program that can detect any optimization of that kind: either
    477   // it is racy (undefined) or there is a release followed by an acquire
    478   // between the pair of accesses under consideration.
    479 
    480   // If the load is invariant, we "know" that it doesn't alias *any* write. We
    481   // do want to respect mustalias results since defs are useful for value
    482   // forwarding, but any mayalias write can be assumed to be noalias.
    483   // Arguably, this logic should be pushed inside AliasAnalysis itself.
    484   if (isLoad && QueryInst) {
    485     LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
    486     if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
    487       isInvariantLoad = true;
    488   }
    489 
    490   const DataLayout &DL = BB->getModule()->getDataLayout();
    491 
    492   // Create a numbered basic block to lazily compute and cache instruction
    493   // positions inside a BB. This is used to provide fast queries for relative
    494   // position between two instructions in a BB and can be used by
    495   // AliasAnalysis::callCapturesBefore.
    496   OrderedBasicBlock OBB(BB);
    497 
    498   // Return "true" if and only if the instruction I is either a non-simple
    499   // load or a non-simple store.
    500   auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
    501     if (auto *LI = dyn_cast<LoadInst>(I))
    502       return !LI->isSimple();
    503     if (auto *SI = dyn_cast<StoreInst>(I))
    504       return !SI->isSimple();
    505     return false;
    506   };
    507 
    508   // Return "true" if I is not a load and not a store, but it does access
    509   // memory.
    510   auto isOtherMemAccess = [](Instruction *I) -> bool {
    511     return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
    512   };
    513 
    514   // Walk backwards through the basic block, looking for dependencies.
    515   while (ScanIt != BB->begin()) {
    516     Instruction *Inst = &*--ScanIt;
    517 
    518     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
    519       // Debug intrinsics don't (and can't) cause dependencies.
    520       if (isa<DbgInfoIntrinsic>(II))
    521         continue;
    522 
    523     // Limit the amount of scanning we do so we don't end up with quadratic
    524     // running time on extreme testcases.
    525     --*Limit;
    526     if (!*Limit)
    527       return MemDepResult::getUnknown();
    528 
    529     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    530       // If we reach a lifetime begin or end marker, then the query ends here
    531       // because the value is undefined.
    532       if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
    533         // FIXME: This only considers queries directly on the invariant-tagged
    534         // pointer, not on query pointers that are indexed off of them.  It'd
    535         // be nice to handle that at some point (the right approach is to use
    536         // GetPointerBaseWithConstantOffset).
    537         if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
    538           return MemDepResult::getDef(II);
    539         continue;
    540       }
    541     }
    542 
    543     // Values depend on loads if the pointers are must aliased.  This means
    544     // that a load depends on another must aliased load from the same value.
    545     // One exception is atomic loads: a value can depend on an atomic load that
    546     // it does not alias with when this atomic load indicates that another
    547     // thread may be accessing the location.
    548     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    549       // While volatile access cannot be eliminated, they do not have to clobber
    550       // non-aliasing locations, as normal accesses, for example, can be safely
    551       // reordered with volatile accesses.
    552       if (LI->isVolatile()) {
    553         if (!QueryInst)
    554           // Original QueryInst *may* be volatile
    555           return MemDepResult::getClobber(LI);
    556         if (isVolatile(QueryInst))
    557           // Ordering required if QueryInst is itself volatile
    558           return MemDepResult::getClobber(LI);
    559         // Otherwise, volatile doesn't imply any special ordering
    560       }
    561 
    562       // Atomic loads have complications involved.
    563       // A Monotonic (or higher) load is OK if the query inst is itself not
    564       // atomic.
    565       // FIXME: This is overly conservative.
    566       if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
    567         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    568             isOtherMemAccess(QueryInst))
    569           return MemDepResult::getClobber(LI);
    570         if (LI->getOrdering() != AtomicOrdering::Monotonic)
    571           return MemDepResult::getClobber(LI);
    572       }
    573 
    574       MemoryLocation LoadLoc = MemoryLocation::get(LI);
    575 
    576       // If we found a pointer, check if it could be the same as our pointer.
    577       AliasResult R = AA.alias(LoadLoc, MemLoc);
    578 
    579       if (isLoad) {
    580         if (R == NoAlias)
    581           continue;
    582 
    583         // Must aliased loads are defs of each other.
    584         if (R == MustAlias)
    585           return MemDepResult::getDef(Inst);
    586 
    587 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
    588       // in terms of clobbering loads, but since it does this by looking
    589       // at the clobbering load directly, it doesn't know about any
    590       // phi translation that may have happened along the way.
    591 
    592         // If we have a partial alias, then return this as a clobber for the
    593         // client to handle.
    594         if (R == PartialAlias)
    595           return MemDepResult::getClobber(Inst);
    596 #endif
    597 
    598         // Random may-alias loads don't depend on each other without a
    599         // dependence.
    600         continue;
    601       }
    602 
    603       // Stores don't depend on other no-aliased accesses.
    604       if (R == NoAlias)
    605         continue;
    606 
    607       // Stores don't alias loads from read-only memory.
    608       if (AA.pointsToConstantMemory(LoadLoc))
    609         continue;
    610 
    611       // Stores depend on may/must aliased loads.
    612       return MemDepResult::getDef(Inst);
    613     }
    614 
    615     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    616       // Atomic stores have complications involved.
    617       // A Monotonic store is OK if the query inst is itself not atomic.
    618       // FIXME: This is overly conservative.
    619       if (!SI->isUnordered() && SI->isAtomic()) {
    620         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    621             isOtherMemAccess(QueryInst))
    622           return MemDepResult::getClobber(SI);
    623         if (SI->getOrdering() != AtomicOrdering::Monotonic)
    624           return MemDepResult::getClobber(SI);
    625       }
    626 
    627       // FIXME: this is overly conservative.
    628       // While volatile access cannot be eliminated, they do not have to clobber
    629       // non-aliasing locations, as normal accesses can for example be reordered
    630       // with volatile accesses.
    631       if (SI->isVolatile())
    632         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    633             isOtherMemAccess(QueryInst))
    634           return MemDepResult::getClobber(SI);
    635 
    636       // If alias analysis can tell that this store is guaranteed to not modify
    637       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
    638       // the query pointer points to constant memory etc.
    639       if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
    640         continue;
    641 
    642       // Ok, this store might clobber the query pointer.  Check to see if it is
    643       // a must alias: in this case, we want to return this as a def.
    644       // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
    645       MemoryLocation StoreLoc = MemoryLocation::get(SI);
    646 
    647       // If we found a pointer, check if it could be the same as our pointer.
    648       AliasResult R = AA.alias(StoreLoc, MemLoc);
    649 
    650       if (R == NoAlias)
    651         continue;
    652       if (R == MustAlias)
    653         return MemDepResult::getDef(Inst);
    654       if (isInvariantLoad)
    655         continue;
    656       return MemDepResult::getClobber(Inst);
    657     }
    658 
    659     // If this is an allocation, and if we know that the accessed pointer is to
    660     // the allocation, return Def.  This means that there is no dependence and
    661     // the access can be optimized based on that.  For example, a load could
    662     // turn into undef.  Note that we can bypass the allocation itself when
    663     // looking for a clobber in many cases; that's an alias property and is
    664     // handled by BasicAA.
    665     if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
    666       const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
    667       if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
    668         return MemDepResult::getDef(Inst);
    669     }
    670 
    671     if (isInvariantLoad)
    672       continue;
    673 
    674     // A release fence requires that all stores complete before it, but does
    675     // not prevent the reordering of following loads or stores 'before' the
    676     // fence.  As a result, we look past it when finding a dependency for
    677     // loads.  DSE uses this to find preceeding stores to delete and thus we
    678     // can't bypass the fence if the query instruction is a store.
    679     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
    680       if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
    681         continue;
    682 
    683     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
    684     ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
    685     // If necessary, perform additional analysis.
    686     if (isModAndRefSet(MR))
    687       MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
    688     switch (clearMust(MR)) {
    689     case ModRefInfo::NoModRef:
    690       // If the call has no effect on the queried pointer, just ignore it.
    691       continue;
    692     case ModRefInfo::Mod:
    693       return MemDepResult::getClobber(Inst);
    694     case ModRefInfo::Ref:
    695       // If the call is known to never store to the pointer, and if this is a
    696       // load query, we can safely ignore it (scan past it).
    697       if (isLoad)
    698         continue;
    699       LLVM_FALLTHROUGH;
    700     default:
    701       // Otherwise, there is a potential dependence.  Return a clobber.
    702       return MemDepResult::getClobber(Inst);
    703     }
    704   }
    705 
    706   // No dependence found.  If this is the entry block of the function, it is
    707   // unknown, otherwise it is non-local.
    708   if (BB != &BB->getParent()->getEntryBlock())
    709     return MemDepResult::getNonLocal();
    710   return MemDepResult::getNonFuncLocal();
    711 }
    712 
    713 MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
    714   Instruction *ScanPos = QueryInst;
    715 
    716   // Check for a cached result
    717   MemDepResult &LocalCache = LocalDeps[QueryInst];
    718 
    719   // If the cached entry is non-dirty, just return it.  Note that this depends
    720   // on MemDepResult's default constructing to 'dirty'.
    721   if (!LocalCache.isDirty())
    722     return LocalCache;
    723 
    724   // Otherwise, if we have a dirty entry, we know we can start the scan at that
    725   // instruction, which may save us some work.
    726   if (Instruction *Inst = LocalCache.getInst()) {
    727     ScanPos = Inst;
    728 
    729     RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
    730   }
    731 
    732   BasicBlock *QueryParent = QueryInst->getParent();
    733 
    734   // Do the scan.
    735   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
    736     // No dependence found. If this is the entry block of the function, it is
    737     // unknown, otherwise it is non-local.
    738     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
    739       LocalCache = MemDepResult::getNonLocal();
    740     else
    741       LocalCache = MemDepResult::getNonFuncLocal();
    742   } else {
    743     MemoryLocation MemLoc;
    744     ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
    745     if (MemLoc.Ptr) {
    746       // If we can do a pointer scan, make it happen.
    747       bool isLoad = !isModSet(MR);
    748       if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
    749         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
    750 
    751       LocalCache = getPointerDependencyFrom(
    752           MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
    753     } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
    754       CallSite QueryCS(QueryInst);
    755       bool isReadOnly = AA.onlyReadsMemory(QueryCS);
    756       LocalCache = getCallSiteDependencyFrom(
    757           QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
    758     } else
    759       // Non-memory instruction.
    760       LocalCache = MemDepResult::getUnknown();
    761   }
    762 
    763   // Remember the result!
    764   if (Instruction *I = LocalCache.getInst())
    765     ReverseLocalDeps[I].insert(QueryInst);
    766 
    767   return LocalCache;
    768 }
    769 
    770 #ifndef NDEBUG
    771 /// This method is used when -debug is specified to verify that cache arrays
    772 /// are properly kept sorted.
    773 static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
    774                          int Count = -1) {
    775   if (Count == -1)
    776     Count = Cache.size();
    777   assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
    778          "Cache isn't sorted!");
    779 }
    780 #endif
    781 
    782 const MemoryDependenceResults::NonLocalDepInfo &
    783 MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
    784   assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
    785          "getNonLocalCallDependency should only be used on calls with "
    786          "non-local deps!");
    787   PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
    788   NonLocalDepInfo &Cache = CacheP.first;
    789 
    790   // This is the set of blocks that need to be recomputed.  In the cached case,
    791   // this can happen due to instructions being deleted etc. In the uncached
    792   // case, this starts out as the set of predecessors we care about.
    793   SmallVector<BasicBlock *, 32> DirtyBlocks;
    794 
    795   if (!Cache.empty()) {
    796     // Okay, we have a cache entry.  If we know it is not dirty, just return it
    797     // with no computation.
    798     if (!CacheP.second) {
    799       ++NumCacheNonLocal;
    800       return Cache;
    801     }
    802 
    803     // If we already have a partially computed set of results, scan them to
    804     // determine what is dirty, seeding our initial DirtyBlocks worklist.
    805     for (auto &Entry : Cache)
    806       if (Entry.getResult().isDirty())
    807         DirtyBlocks.push_back(Entry.getBB());
    808 
    809     // Sort the cache so that we can do fast binary search lookups below.
    810     llvm::sort(Cache.begin(), Cache.end());
    811 
    812     ++NumCacheDirtyNonLocal;
    813     // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
    814     //     << Cache.size() << " cached: " << *QueryInst;
    815   } else {
    816     // Seed DirtyBlocks with each of the preds of QueryInst's block.
    817     BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
    818     for (BasicBlock *Pred : PredCache.get(QueryBB))
    819       DirtyBlocks.push_back(Pred);
    820     ++NumUncacheNonLocal;
    821   }
    822 
    823   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
    824   bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
    825 
    826   SmallPtrSet<BasicBlock *, 32> Visited;
    827 
    828   unsigned NumSortedEntries = Cache.size();
    829   LLVM_DEBUG(AssertSorted(Cache));
    830 
    831   // Iterate while we still have blocks to update.
    832   while (!DirtyBlocks.empty()) {
    833     BasicBlock *DirtyBB = DirtyBlocks.back();
    834     DirtyBlocks.pop_back();
    835 
    836     // Already processed this block?
    837     if (!Visited.insert(DirtyBB).second)
    838       continue;
    839 
    840     // Do a binary search to see if we already have an entry for this block in
    841     // the cache set.  If so, find it.
    842     LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
    843     NonLocalDepInfo::iterator Entry =
    844         std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
    845                          NonLocalDepEntry(DirtyBB));
    846     if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
    847       --Entry;
    848 
    849     NonLocalDepEntry *ExistingResult = nullptr;
    850     if (Entry != Cache.begin() + NumSortedEntries &&
    851         Entry->getBB() == DirtyBB) {
    852       // If we already have an entry, and if it isn't already dirty, the block
    853       // is done.
    854       if (!Entry->getResult().isDirty())
    855         continue;
    856 
    857       // Otherwise, remember this slot so we can update the value.
    858       ExistingResult = &*Entry;
    859     }
    860 
    861     // If the dirty entry has a pointer, start scanning from it so we don't have
    862     // to rescan the entire block.
    863     BasicBlock::iterator ScanPos = DirtyBB->end();
    864     if (ExistingResult) {
    865       if (Instruction *Inst = ExistingResult->getResult().getInst()) {
    866         ScanPos = Inst->getIterator();
    867         // We're removing QueryInst's use of Inst.
    868         RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
    869                              QueryCS.getInstruction());
    870       }
    871     }
    872 
    873     // Find out if this block has a local dependency for QueryInst.
    874     MemDepResult Dep;
    875 
    876     if (ScanPos != DirtyBB->begin()) {
    877       Dep =
    878           getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
    879     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
    880       // No dependence found.  If this is the entry block of the function, it is
    881       // a clobber, otherwise it is unknown.
    882       Dep = MemDepResult::getNonLocal();
    883     } else {
    884       Dep = MemDepResult::getNonFuncLocal();
    885     }
    886 
    887     // If we had a dirty entry for the block, update it.  Otherwise, just add
    888     // a new entry.
    889     if (ExistingResult)
    890       ExistingResult->setResult(Dep);
    891     else
    892       Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
    893 
    894     // If the block has a dependency (i.e. it isn't completely transparent to
    895     // the value), remember the association!
    896     if (!Dep.isNonLocal()) {
    897       // Keep the ReverseNonLocalDeps map up to date so we can efficiently
    898       // update this when we remove instructions.
    899       if (Instruction *Inst = Dep.getInst())
    900         ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
    901     } else {
    902 
    903       // If the block *is* completely transparent to the load, we need to check
    904       // the predecessors of this block.  Add them to our worklist.
    905       for (BasicBlock *Pred : PredCache.get(DirtyBB))
    906         DirtyBlocks.push_back(Pred);
    907     }
    908   }
    909 
    910   return Cache;
    911 }
    912 
    913 void MemoryDependenceResults::getNonLocalPointerDependency(
    914     Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
    915   const MemoryLocation Loc = MemoryLocation::get(QueryInst);
    916   bool isLoad = isa<LoadInst>(QueryInst);
    917   BasicBlock *FromBB = QueryInst->getParent();
    918   assert(FromBB);
    919 
    920   assert(Loc.Ptr->getType()->isPointerTy() &&
    921          "Can't get pointer deps of a non-pointer!");
    922   Result.clear();
    923   {
    924     // Check if there is cached Def with invariant.group.
    925     auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
    926     if (NonLocalDefIt != NonLocalDefsCache.end()) {
    927       Result.push_back(NonLocalDefIt->second);
    928       ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
    929           .erase(QueryInst);
    930       NonLocalDefsCache.erase(NonLocalDefIt);
    931       return;
    932     }
    933   }
    934   // This routine does not expect to deal with volatile instructions.
    935   // Doing so would require piping through the QueryInst all the way through.
    936   // TODO: volatiles can't be elided, but they can be reordered with other
    937   // non-volatile accesses.
    938 
    939   // We currently give up on any instruction which is ordered, but we do handle
    940   // atomic instructions which are unordered.
    941   // TODO: Handle ordered instructions
    942   auto isOrdered = [](Instruction *Inst) {
    943     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    944       return !LI->isUnordered();
    945     } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    946       return !SI->isUnordered();
    947     }
    948     return false;
    949   };
    950   if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
    951     Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
    952                                        const_cast<Value *>(Loc.Ptr)));
    953     return;
    954   }
    955   const DataLayout &DL = FromBB->getModule()->getDataLayout();
    956   PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
    957 
    958   // This is the set of blocks we've inspected, and the pointer we consider in
    959   // each block.  Because of critical edges, we currently bail out if querying
    960   // a block with multiple different pointers.  This can happen during PHI
    961   // translation.
    962   DenseMap<BasicBlock *, Value *> Visited;
    963   if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
    964                                    Result, Visited, true))
    965     return;
    966   Result.clear();
    967   Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
    968                                      const_cast<Value *>(Loc.Ptr)));
    969 }
    970 
    971 /// Compute the memdep value for BB with Pointer/PointeeSize using either
    972 /// cached information in Cache or by doing a lookup (which may use dirty cache
    973 /// info if available).
    974 ///
    975 /// If we do a lookup, add the result to the cache.
    976 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
    977     Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
    978     BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
    979 
    980   // Do a binary search to see if we already have an entry for this block in
    981   // the cache set.  If so, find it.
    982   NonLocalDepInfo::iterator Entry = std::upper_bound(
    983       Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
    984   if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
    985     --Entry;
    986 
    987   NonLocalDepEntry *ExistingResult = nullptr;
    988   if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
    989     ExistingResult = &*Entry;
    990 
    991   // If we have a cached entry, and it is non-dirty, use it as the value for
    992   // this dependency.
    993   if (ExistingResult && !ExistingResult->getResult().isDirty()) {
    994     ++NumCacheNonLocalPtr;
    995     return ExistingResult->getResult();
    996   }
    997 
    998   // Otherwise, we have to scan for the value.  If we have a dirty cache
    999   // entry, start scanning from its position, otherwise we scan from the end
   1000   // of the block.
   1001   BasicBlock::iterator ScanPos = BB->end();
   1002   if (ExistingResult && ExistingResult->getResult().getInst()) {
   1003     assert(ExistingResult->getResult().getInst()->getParent() == BB &&
   1004            "Instruction invalidated?");
   1005     ++NumCacheDirtyNonLocalPtr;
   1006     ScanPos = ExistingResult->getResult().getInst()->getIterator();
   1007 
   1008     // Eliminating the dirty entry from 'Cache', so update the reverse info.
   1009     ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
   1010     RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
   1011   } else {
   1012     ++NumUncacheNonLocalPtr;
   1013   }
   1014 
   1015   // Scan the block for the dependency.
   1016   MemDepResult Dep =
   1017       getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
   1018 
   1019   // If we had a dirty entry for the block, update it.  Otherwise, just add
   1020   // a new entry.
   1021   if (ExistingResult)
   1022     ExistingResult->setResult(Dep);
   1023   else
   1024     Cache->push_back(NonLocalDepEntry(BB, Dep));
   1025 
   1026   // If the block has a dependency (i.e. it isn't completely transparent to
   1027   // the value), remember the reverse association because we just added it
   1028   // to Cache!
   1029   if (!Dep.isDef() && !Dep.isClobber())
   1030     return Dep;
   1031 
   1032   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
   1033   // update MemDep when we remove instructions.
   1034   Instruction *Inst = Dep.getInst();
   1035   assert(Inst && "Didn't depend on anything?");
   1036   ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
   1037   ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
   1038   return Dep;
   1039 }
   1040 
   1041 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
   1042 /// array that are already properly ordered.
   1043 ///
   1044 /// This is optimized for the case when only a few entries are added.
   1045 static void
   1046 SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
   1047                          unsigned NumSortedEntries) {
   1048   switch (Cache.size() - NumSortedEntries) {
   1049   case 0:
   1050     // done, no new entries.
   1051     break;
   1052   case 2: {
   1053     // Two new entries, insert the last one into place.
   1054     NonLocalDepEntry Val = Cache.back();
   1055     Cache.pop_back();
   1056     MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
   1057         std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
   1058     Cache.insert(Entry, Val);
   1059     LLVM_FALLTHROUGH;
   1060   }
   1061   case 1:
   1062     // One new entry, Just insert the new value at the appropriate position.
   1063     if (Cache.size() != 1) {
   1064       NonLocalDepEntry Val = Cache.back();
   1065       Cache.pop_back();
   1066       MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
   1067           std::upper_bound(Cache.begin(), Cache.end(), Val);
   1068       Cache.insert(Entry, Val);
   1069     }
   1070     break;
   1071   default:
   1072     // Added many values, do a full scale sort.
   1073     llvm::sort(Cache.begin(), Cache.end());
   1074     break;
   1075   }
   1076 }
   1077 
   1078 /// Perform a dependency query based on pointer/pointeesize starting at the end
   1079 /// of StartBB.
   1080 ///
   1081 /// Add any clobber/def results to the results vector and keep track of which
   1082 /// blocks are visited in 'Visited'.
   1083 ///
   1084 /// This has special behavior for the first block queries (when SkipFirstBlock
   1085 /// is true).  In this special case, it ignores the contents of the specified
   1086 /// block and starts returning dependence info for its predecessors.
   1087 ///
   1088 /// This function returns true on success, or false to indicate that it could
   1089 /// not compute dependence information for some reason.  This should be treated
   1090 /// as a clobber dependence on the first instruction in the predecessor block.
   1091 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
   1092     Instruction *QueryInst, const PHITransAddr &Pointer,
   1093     const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
   1094     SmallVectorImpl<NonLocalDepResult> &Result,
   1095     DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
   1096   // Look up the cached info for Pointer.
   1097   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
   1098 
   1099   // Set up a temporary NLPI value. If the map doesn't yet have an entry for
   1100   // CacheKey, this value will be inserted as the associated value. Otherwise,
   1101   // it'll be ignored, and we'll have to check to see if the cached size and
   1102   // aa tags are consistent with the current query.
   1103   NonLocalPointerInfo InitialNLPI;
   1104   InitialNLPI.Size = Loc.Size;
   1105   InitialNLPI.AATags = Loc.AATags;
   1106 
   1107   // Get the NLPI for CacheKey, inserting one into the map if it doesn't
   1108   // already have one.
   1109   std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
   1110       NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
   1111   NonLocalPointerInfo *CacheInfo = &Pair.first->second;
   1112 
   1113   // If we already have a cache entry for this CacheKey, we may need to do some
   1114   // work to reconcile the cache entry and the current query.
   1115   if (!Pair.second) {
   1116     if (CacheInfo->Size < Loc.Size) {
   1117       // The query's Size is greater than the cached one. Throw out the
   1118       // cached data and proceed with the query at the greater size.
   1119       CacheInfo->Pair = BBSkipFirstBlockPair();
   1120       CacheInfo->Size = Loc.Size;
   1121       for (auto &Entry : CacheInfo->NonLocalDeps)
   1122         if (Instruction *Inst = Entry.getResult().getInst())
   1123           RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
   1124       CacheInfo->NonLocalDeps.clear();
   1125     } else if (CacheInfo->Size > Loc.Size) {
   1126       // This query's Size is less than the cached one. Conservatively restart
   1127       // the query using the greater size.
   1128       return getNonLocalPointerDepFromBB(
   1129           QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
   1130           StartBB, Result, Visited, SkipFirstBlock);
   1131     }
   1132 
   1133     // If the query's AATags are inconsistent with the cached one,
   1134     // conservatively throw out the cached data and restart the query with
   1135     // no tag if needed.
   1136     if (CacheInfo->AATags != Loc.AATags) {
   1137       if (CacheInfo->AATags) {
   1138         CacheInfo->Pair = BBSkipFirstBlockPair();
   1139         CacheInfo->AATags = AAMDNodes();
   1140         for (auto &Entry : CacheInfo->NonLocalDeps)
   1141           if (Instruction *Inst = Entry.getResult().getInst())
   1142             RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
   1143         CacheInfo->NonLocalDeps.clear();
   1144       }
   1145       if (Loc.AATags)
   1146         return getNonLocalPointerDepFromBB(
   1147             QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
   1148             Visited, SkipFirstBlock);
   1149     }
   1150   }
   1151 
   1152   NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
   1153 
   1154   // If we have valid cached information for exactly the block we are
   1155   // investigating, just return it with no recomputation.
   1156   if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
   1157     // We have a fully cached result for this query then we can just return the
   1158     // cached results and populate the visited set.  However, we have to verify
   1159     // that we don't already have conflicting results for these blocks.  Check
   1160     // to ensure that if a block in the results set is in the visited set that
   1161     // it was for the same pointer query.
   1162     if (!Visited.empty()) {
   1163       for (auto &Entry : *Cache) {
   1164         DenseMap<BasicBlock *, Value *>::iterator VI =
   1165             Visited.find(Entry.getBB());
   1166         if (VI == Visited.end() || VI->second == Pointer.getAddr())
   1167           continue;
   1168 
   1169         // We have a pointer mismatch in a block.  Just return false, saying
   1170         // that something was clobbered in this result.  We could also do a
   1171         // non-fully cached query, but there is little point in doing this.
   1172         return false;
   1173       }
   1174     }
   1175 
   1176     Value *Addr = Pointer.getAddr();
   1177     for (auto &Entry : *Cache) {
   1178       Visited.insert(std::make_pair(Entry.getBB(), Addr));
   1179       if (Entry.getResult().isNonLocal()) {
   1180         continue;
   1181       }
   1182 
   1183       if (DT.isReachableFromEntry(Entry.getBB())) {
   1184         Result.push_back(
   1185             NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
   1186       }
   1187     }
   1188     ++NumCacheCompleteNonLocalPtr;
   1189     return true;
   1190   }
   1191 
   1192   // Otherwise, either this is a new block, a block with an invalid cache
   1193   // pointer or one that we're about to invalidate by putting more info into it
   1194   // than its valid cache info.  If empty, the result will be valid cache info,
   1195   // otherwise it isn't.
   1196   if (Cache->empty())
   1197     CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
   1198   else
   1199     CacheInfo->Pair = BBSkipFirstBlockPair();
   1200 
   1201   SmallVector<BasicBlock *, 32> Worklist;
   1202   Worklist.push_back(StartBB);
   1203 
   1204   // PredList used inside loop.
   1205   SmallVector<std::pair<BasicBlock *, PHITransAddr>, 16> PredList;
   1206 
   1207   // Keep track of the entries that we know are sorted.  Previously cached
   1208   // entries will all be sorted.  The entries we add we only sort on demand (we
   1209   // don't insert every element into its sorted position).  We know that we
   1210   // won't get any reuse from currently inserted values, because we don't
   1211   // revisit blocks after we insert info for them.
   1212   unsigned NumSortedEntries = Cache->size();
   1213   unsigned WorklistEntries = BlockNumberLimit;
   1214   bool GotWorklistLimit = false;
   1215   LLVM_DEBUG(AssertSorted(*Cache));
   1216 
   1217   while (!Worklist.empty()) {
   1218     BasicBlock *BB = Worklist.pop_back_val();
   1219 
   1220     // If we do process a large number of blocks it becomes very expensive and
   1221     // likely it isn't worth worrying about
   1222     if (Result.size() > NumResultsLimit) {
   1223       Worklist.clear();
   1224       // Sort it now (if needed) so that recursive invocations of
   1225       // getNonLocalPointerDepFromBB and other routines that could reuse the
   1226       // cache value will only see properly sorted cache arrays.
   1227       if (Cache && NumSortedEntries != Cache->size()) {
   1228         SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1229       }
   1230       // Since we bail out, the "Cache" set won't contain all of the
   1231       // results for the query.  This is ok (we can still use it to accelerate
   1232       // specific block queries) but we can't do the fastpath "return all
   1233       // results from the set".  Clear out the indicator for this.
   1234       CacheInfo->Pair = BBSkipFirstBlockPair();
   1235       return false;
   1236     }
   1237 
   1238     // Skip the first block if we have it.
   1239     if (!SkipFirstBlock) {
   1240       // Analyze the dependency of *Pointer in FromBB.  See if we already have
   1241       // been here.
   1242       assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
   1243 
   1244       // Get the dependency info for Pointer in BB.  If we have cached
   1245       // information, we will use it, otherwise we compute it.
   1246       LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
   1247       MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
   1248                                                  Cache, NumSortedEntries);
   1249 
   1250       // If we got a Def or Clobber, add this to the list of results.
   1251       if (!Dep.isNonLocal()) {
   1252         if (DT.isReachableFromEntry(BB)) {
   1253           Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
   1254           continue;
   1255         }
   1256       }
   1257     }
   1258 
   1259     // If 'Pointer' is an instruction defined in this block, then we need to do
   1260     // phi translation to change it into a value live in the predecessor block.
   1261     // If not, we just add the predecessors to the worklist and scan them with
   1262     // the same Pointer.
   1263     if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
   1264       SkipFirstBlock = false;
   1265       SmallVector<BasicBlock *, 16> NewBlocks;
   1266       for (BasicBlock *Pred : PredCache.get(BB)) {
   1267         // Verify that we haven't looked at this block yet.
   1268         std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
   1269             Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
   1270         if (InsertRes.second) {
   1271           // First time we've looked at *PI.
   1272           NewBlocks.push_back(Pred);
   1273           continue;
   1274         }
   1275 
   1276         // If we have seen this block before, but it was with a different
   1277         // pointer then we have a phi translation failure and we have to treat
   1278         // this as a clobber.
   1279         if (InsertRes.first->second != Pointer.getAddr()) {
   1280           // Make sure to clean up the Visited map before continuing on to
   1281           // PredTranslationFailure.
   1282           for (unsigned i = 0; i < NewBlocks.size(); i++)
   1283             Visited.erase(NewBlocks[i]);
   1284           goto PredTranslationFailure;
   1285         }
   1286       }
   1287       if (NewBlocks.size() > WorklistEntries) {
   1288         // Make sure to clean up the Visited map before continuing on to
   1289         // PredTranslationFailure.
   1290         for (unsigned i = 0; i < NewBlocks.size(); i++)
   1291           Visited.erase(NewBlocks[i]);
   1292         GotWorklistLimit = true;
   1293         goto PredTranslationFailure;
   1294       }
   1295       WorklistEntries -= NewBlocks.size();
   1296       Worklist.append(NewBlocks.begin(), NewBlocks.end());
   1297       continue;
   1298     }
   1299 
   1300     // We do need to do phi translation, if we know ahead of time we can't phi
   1301     // translate this value, don't even try.
   1302     if (!Pointer.IsPotentiallyPHITranslatable())
   1303       goto PredTranslationFailure;
   1304 
   1305     // We may have added values to the cache list before this PHI translation.
   1306     // If so, we haven't done anything to ensure that the cache remains sorted.
   1307     // Sort it now (if needed) so that recursive invocations of
   1308     // getNonLocalPointerDepFromBB and other routines that could reuse the cache
   1309     // value will only see properly sorted cache arrays.
   1310     if (Cache && NumSortedEntries != Cache->size()) {
   1311       SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1312       NumSortedEntries = Cache->size();
   1313     }
   1314     Cache = nullptr;
   1315 
   1316     PredList.clear();
   1317     for (BasicBlock *Pred : PredCache.get(BB)) {
   1318       PredList.push_back(std::make_pair(Pred, Pointer));
   1319 
   1320       // Get the PHI translated pointer in this predecessor.  This can fail if
   1321       // not translatable, in which case the getAddr() returns null.
   1322       PHITransAddr &PredPointer = PredList.back().second;
   1323       PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
   1324       Value *PredPtrVal = PredPointer.getAddr();
   1325 
   1326       // Check to see if we have already visited this pred block with another
   1327       // pointer.  If so, we can't do this lookup.  This failure can occur
   1328       // with PHI translation when a critical edge exists and the PHI node in
   1329       // the successor translates to a pointer value different than the
   1330       // pointer the block was first analyzed with.
   1331       std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
   1332           Visited.insert(std::make_pair(Pred, PredPtrVal));
   1333 
   1334       if (!InsertRes.second) {
   1335         // We found the pred; take it off the list of preds to visit.
   1336         PredList.pop_back();
   1337 
   1338         // If the predecessor was visited with PredPtr, then we already did
   1339         // the analysis and can ignore it.
   1340         if (InsertRes.first->second == PredPtrVal)
   1341           continue;
   1342 
   1343         // Otherwise, the block was previously analyzed with a different
   1344         // pointer.  We can't represent the result of this case, so we just
   1345         // treat this as a phi translation failure.
   1346 
   1347         // Make sure to clean up the Visited map before continuing on to
   1348         // PredTranslationFailure.
   1349         for (unsigned i = 0, n = PredList.size(); i < n; ++i)
   1350           Visited.erase(PredList[i].first);
   1351 
   1352         goto PredTranslationFailure;
   1353       }
   1354     }
   1355 
   1356     // Actually process results here; this need to be a separate loop to avoid
   1357     // calling getNonLocalPointerDepFromBB for blocks we don't want to return
   1358     // any results for.  (getNonLocalPointerDepFromBB will modify our
   1359     // datastructures in ways the code after the PredTranslationFailure label
   1360     // doesn't expect.)
   1361     for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
   1362       BasicBlock *Pred = PredList[i].first;
   1363       PHITransAddr &PredPointer = PredList[i].second;
   1364       Value *PredPtrVal = PredPointer.getAddr();
   1365 
   1366       bool CanTranslate = true;
   1367       // If PHI translation was unable to find an available pointer in this
   1368       // predecessor, then we have to assume that the pointer is clobbered in
   1369       // that predecessor.  We can still do PRE of the load, which would insert
   1370       // a computation of the pointer in this predecessor.
   1371       if (!PredPtrVal)
   1372         CanTranslate = false;
   1373 
   1374       // FIXME: it is entirely possible that PHI translating will end up with
   1375       // the same value.  Consider PHI translating something like:
   1376       // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
   1377       // to recurse here, pedantically speaking.
   1378 
   1379       // If getNonLocalPointerDepFromBB fails here, that means the cached
   1380       // result conflicted with the Visited list; we have to conservatively
   1381       // assume it is unknown, but this also does not block PRE of the load.
   1382       if (!CanTranslate ||
   1383           !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
   1384                                       Loc.getWithNewPtr(PredPtrVal), isLoad,
   1385                                       Pred, Result, Visited)) {
   1386         // Add the entry to the Result list.
   1387         NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
   1388         Result.push_back(Entry);
   1389 
   1390         // Since we had a phi translation failure, the cache for CacheKey won't
   1391         // include all of the entries that we need to immediately satisfy future
   1392         // queries.  Mark this in NonLocalPointerDeps by setting the
   1393         // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
   1394         // cached value to do more work but not miss the phi trans failure.
   1395         NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
   1396         NLPI.Pair = BBSkipFirstBlockPair();
   1397         continue;
   1398       }
   1399     }
   1400 
   1401     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
   1402     CacheInfo = &NonLocalPointerDeps[CacheKey];
   1403     Cache = &CacheInfo->NonLocalDeps;
   1404     NumSortedEntries = Cache->size();
   1405 
   1406     // Since we did phi translation, the "Cache" set won't contain all of the
   1407     // results for the query.  This is ok (we can still use it to accelerate
   1408     // specific block queries) but we can't do the fastpath "return all
   1409     // results from the set"  Clear out the indicator for this.
   1410     CacheInfo->Pair = BBSkipFirstBlockPair();
   1411     SkipFirstBlock = false;
   1412     continue;
   1413 
   1414   PredTranslationFailure:
   1415     // The following code is "failure"; we can't produce a sane translation
   1416     // for the given block.  It assumes that we haven't modified any of
   1417     // our datastructures while processing the current block.
   1418 
   1419     if (!Cache) {
   1420       // Refresh the CacheInfo/Cache pointer if it got invalidated.
   1421       CacheInfo = &NonLocalPointerDeps[CacheKey];
   1422       Cache = &CacheInfo->NonLocalDeps;
   1423       NumSortedEntries = Cache->size();
   1424     }
   1425 
   1426     // Since we failed phi translation, the "Cache" set won't contain all of the
   1427     // results for the query.  This is ok (we can still use it to accelerate
   1428     // specific block queries) but we can't do the fastpath "return all
   1429     // results from the set".  Clear out the indicator for this.
   1430     CacheInfo->Pair = BBSkipFirstBlockPair();
   1431 
   1432     // If *nothing* works, mark the pointer as unknown.
   1433     //
   1434     // If this is the magic first block, return this as a clobber of the whole
   1435     // incoming value.  Since we can't phi translate to one of the predecessors,
   1436     // we have to bail out.
   1437     if (SkipFirstBlock)
   1438       return false;
   1439 
   1440     bool foundBlock = false;
   1441     for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
   1442       if (I.getBB() != BB)
   1443         continue;
   1444 
   1445       assert((GotWorklistLimit || I.getResult().isNonLocal() ||
   1446               !DT.isReachableFromEntry(BB)) &&
   1447              "Should only be here with transparent block");
   1448       foundBlock = true;
   1449       I.setResult(MemDepResult::getUnknown());
   1450       Result.push_back(
   1451           NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
   1452       break;
   1453     }
   1454     (void)foundBlock; (void)GotWorklistLimit;
   1455     assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
   1456   }
   1457 
   1458   // Okay, we're done now.  If we added new values to the cache, re-sort it.
   1459   SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1460   LLVM_DEBUG(AssertSorted(*Cache));
   1461   return true;
   1462 }
   1463 
   1464 /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
   1465 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
   1466     ValueIsLoadPair P) {
   1467 
   1468   // Most of the time this cache is empty.
   1469   if (!NonLocalDefsCache.empty()) {
   1470     auto it = NonLocalDefsCache.find(P.getPointer());
   1471     if (it != NonLocalDefsCache.end()) {
   1472       RemoveFromReverseMap(ReverseNonLocalDefsCache,
   1473                            it->second.getResult().getInst(), P.getPointer());
   1474       NonLocalDefsCache.erase(it);
   1475     }
   1476 
   1477     if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
   1478       auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
   1479       if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
   1480         for (const auto &entry : toRemoveIt->second)
   1481           NonLocalDefsCache.erase(entry);
   1482         ReverseNonLocalDefsCache.erase(toRemoveIt);
   1483       }
   1484     }
   1485   }
   1486 
   1487   CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
   1488   if (It == NonLocalPointerDeps.end())
   1489     return;
   1490 
   1491   // Remove all of the entries in the BB->val map.  This involves removing
   1492   // instructions from the reverse map.
   1493   NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
   1494 
   1495   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
   1496     Instruction *Target = PInfo[i].getResult().getInst();
   1497     if (!Target)
   1498       continue; // Ignore non-local dep results.
   1499     assert(Target->getParent() == PInfo[i].getBB());
   1500 
   1501     // Eliminating the dirty entry from 'Cache', so update the reverse info.
   1502     RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
   1503   }
   1504 
   1505   // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
   1506   NonLocalPointerDeps.erase(It);
   1507 }
   1508 
   1509 void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
   1510   // If Ptr isn't really a pointer, just ignore it.
   1511   if (!Ptr->getType()->isPointerTy())
   1512     return;
   1513   // Flush store info for the pointer.
   1514   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
   1515   // Flush load info for the pointer.
   1516   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
   1517   // Invalidate phis that use the pointer.
   1518   PV.invalidateValue(Ptr);
   1519 }
   1520 
   1521 void MemoryDependenceResults::invalidateCachedPredecessors() {
   1522   PredCache.clear();
   1523 }
   1524 
   1525 void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
   1526   // Walk through the Non-local dependencies, removing this one as the value
   1527   // for any cached queries.
   1528   NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
   1529   if (NLDI != NonLocalDeps.end()) {
   1530     NonLocalDepInfo &BlockMap = NLDI->second.first;
   1531     for (auto &Entry : BlockMap)
   1532       if (Instruction *Inst = Entry.getResult().getInst())
   1533         RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
   1534     NonLocalDeps.erase(NLDI);
   1535   }
   1536 
   1537   // If we have a cached local dependence query for this instruction, remove it.
   1538   LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
   1539   if (LocalDepEntry != LocalDeps.end()) {
   1540     // Remove us from DepInst's reverse set now that the local dep info is gone.
   1541     if (Instruction *Inst = LocalDepEntry->second.getInst())
   1542       RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
   1543 
   1544     // Remove this local dependency info.
   1545     LocalDeps.erase(LocalDepEntry);
   1546   }
   1547 
   1548   // If we have any cached pointer dependencies on this instruction, remove
   1549   // them.  If the instruction has non-pointer type, then it can't be a pointer
   1550   // base.
   1551 
   1552   // Remove it from both the load info and the store info.  The instruction
   1553   // can't be in either of these maps if it is non-pointer.
   1554   if (RemInst->getType()->isPointerTy()) {
   1555     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
   1556     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
   1557   }
   1558 
   1559   // Loop over all of the things that depend on the instruction we're removing.
   1560   SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd;
   1561 
   1562   // If we find RemInst as a clobber or Def in any of the maps for other values,
   1563   // we need to replace its entry with a dirty version of the instruction after
   1564   // it.  If RemInst is a terminator, we use a null dirty value.
   1565   //
   1566   // Using a dirty version of the instruction after RemInst saves having to scan
   1567   // the entire block to get to this point.
   1568   MemDepResult NewDirtyVal;
   1569   if (!RemInst->isTerminator())
   1570     NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
   1571 
   1572   ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
   1573   if (ReverseDepIt != ReverseLocalDeps.end()) {
   1574     // RemInst can't be the terminator if it has local stuff depending on it.
   1575     assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
   1576            "Nothing can locally depend on a terminator");
   1577 
   1578     for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
   1579       assert(InstDependingOnRemInst != RemInst &&
   1580              "Already removed our local dep info");
   1581 
   1582       LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
   1583 
   1584       // Make sure to remember that new things depend on NewDepInst.
   1585       assert(NewDirtyVal.getInst() &&
   1586              "There is no way something else can have "
   1587              "a local dep on this if it is a terminator!");
   1588       ReverseDepsToAdd.push_back(
   1589           std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
   1590     }
   1591 
   1592     ReverseLocalDeps.erase(ReverseDepIt);
   1593 
   1594     // Add new reverse deps after scanning the set, to avoid invalidating the
   1595     // 'ReverseDeps' reference.
   1596     while (!ReverseDepsToAdd.empty()) {
   1597       ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
   1598           ReverseDepsToAdd.back().second);
   1599       ReverseDepsToAdd.pop_back();
   1600     }
   1601   }
   1602 
   1603   ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
   1604   if (ReverseDepIt != ReverseNonLocalDeps.end()) {
   1605     for (Instruction *I : ReverseDepIt->second) {
   1606       assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
   1607 
   1608       PerInstNLInfo &INLD = NonLocalDeps[I];
   1609       // The information is now dirty!
   1610       INLD.second = true;
   1611 
   1612       for (auto &Entry : INLD.first) {
   1613         if (Entry.getResult().getInst() != RemInst)
   1614           continue;
   1615 
   1616         // Convert to a dirty entry for the subsequent instruction.
   1617         Entry.setResult(NewDirtyVal);
   1618 
   1619         if (Instruction *NextI = NewDirtyVal.getInst())
   1620           ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
   1621       }
   1622     }
   1623 
   1624     ReverseNonLocalDeps.erase(ReverseDepIt);
   1625 
   1626     // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
   1627     while (!ReverseDepsToAdd.empty()) {
   1628       ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
   1629           ReverseDepsToAdd.back().second);
   1630       ReverseDepsToAdd.pop_back();
   1631     }
   1632   }
   1633 
   1634   // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
   1635   // value in the NonLocalPointerDeps info.
   1636   ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
   1637       ReverseNonLocalPtrDeps.find(RemInst);
   1638   if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
   1639     SmallVector<std::pair<Instruction *, ValueIsLoadPair>, 8>
   1640         ReversePtrDepsToAdd;
   1641 
   1642     for (ValueIsLoadPair P : ReversePtrDepIt->second) {
   1643       assert(P.getPointer() != RemInst &&
   1644              "Already removed NonLocalPointerDeps info for RemInst");
   1645 
   1646       NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
   1647 
   1648       // The cache is not valid for any specific block anymore.
   1649       NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
   1650 
   1651       // Update any entries for RemInst to use the instruction after it.
   1652       for (auto &Entry : NLPDI) {
   1653         if (Entry.getResult().getInst() != RemInst)
   1654           continue;
   1655 
   1656         // Convert to a dirty entry for the subsequent instruction.
   1657         Entry.setResult(NewDirtyVal);
   1658 
   1659         if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
   1660           ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
   1661       }
   1662 
   1663       // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
   1664       // subsequent value may invalidate the sortedness.
   1665       llvm::sort(NLPDI.begin(), NLPDI.end());
   1666     }
   1667 
   1668     ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
   1669 
   1670     while (!ReversePtrDepsToAdd.empty()) {
   1671       ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
   1672           ReversePtrDepsToAdd.back().second);
   1673       ReversePtrDepsToAdd.pop_back();
   1674     }
   1675   }
   1676 
   1677   // Invalidate phis that use the removed instruction.
   1678   PV.invalidateValue(RemInst);
   1679 
   1680   assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
   1681   LLVM_DEBUG(verifyRemoved(RemInst));
   1682 }
   1683 
   1684 /// Verify that the specified instruction does not occur in our internal data
   1685 /// structures.
   1686 ///
   1687 /// This function verifies by asserting in debug builds.
   1688 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
   1689 #ifndef NDEBUG
   1690   for (const auto &DepKV : LocalDeps) {
   1691     assert(DepKV.first != D && "Inst occurs in data structures");
   1692     assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
   1693   }
   1694 
   1695   for (const auto &DepKV : NonLocalPointerDeps) {
   1696     assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
   1697     for (const auto &Entry : DepKV.second.NonLocalDeps)
   1698       assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
   1699   }
   1700 
   1701   for (const auto &DepKV : NonLocalDeps) {
   1702     assert(DepKV.first != D && "Inst occurs in data structures");
   1703     const PerInstNLInfo &INLD = DepKV.second;
   1704     for (const auto &Entry : INLD.first)
   1705       assert(Entry.getResult().getInst() != D &&
   1706              "Inst occurs in data structures");
   1707   }
   1708 
   1709   for (const auto &DepKV : ReverseLocalDeps) {
   1710     assert(DepKV.first != D && "Inst occurs in data structures");
   1711     for (Instruction *Inst : DepKV.second)
   1712       assert(Inst != D && "Inst occurs in data structures");
   1713   }
   1714 
   1715   for (const auto &DepKV : ReverseNonLocalDeps) {
   1716     assert(DepKV.first != D && "Inst occurs in data structures");
   1717     for (Instruction *Inst : DepKV.second)
   1718       assert(Inst != D && "Inst occurs in data structures");
   1719   }
   1720 
   1721   for (const auto &DepKV : ReverseNonLocalPtrDeps) {
   1722     assert(DepKV.first != D && "Inst occurs in rev NLPD map");
   1723 
   1724     for (ValueIsLoadPair P : DepKV.second)
   1725       assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
   1726              "Inst occurs in ReverseNonLocalPtrDeps map");
   1727   }
   1728 #endif
   1729 }
   1730 
   1731 AnalysisKey MemoryDependenceAnalysis::Key;
   1732 
   1733 MemoryDependenceResults
   1734 MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   1735   auto &AA = AM.getResult<AAManager>(F);
   1736   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   1737   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   1738   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   1739   auto &PV = AM.getResult<PhiValuesAnalysis>(F);
   1740   return MemoryDependenceResults(AA, AC, TLI, DT, PV);
   1741 }
   1742 
   1743 char MemoryDependenceWrapperPass::ID = 0;
   1744 
   1745 INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep",
   1746                       "Memory Dependence Analysis", false, true)
   1747 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
   1748 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
   1749 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
   1750 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
   1751 INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)
   1752 INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
   1753                     "Memory Dependence Analysis", false, true)
   1754 
   1755 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
   1756   initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
   1757 }
   1758 
   1759 MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
   1760 
   1761 void MemoryDependenceWrapperPass::releaseMemory() {
   1762   MemDep.reset();
   1763 }
   1764 
   1765 void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   1766   AU.setPreservesAll();
   1767   AU.addRequired<AssumptionCacheTracker>();
   1768   AU.addRequired<DominatorTreeWrapperPass>();
   1769   AU.addRequired<PhiValuesWrapperPass>();
   1770   AU.addRequiredTransitive<AAResultsWrapperPass>();
   1771   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
   1772 }
   1773 
   1774 bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
   1775                                FunctionAnalysisManager::Invalidator &Inv) {
   1776   // Check whether our analysis is preserved.
   1777   auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
   1778   if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
   1779     // If not, give up now.
   1780     return true;
   1781 
   1782   // Check whether the analyses we depend on became invalid for any reason.
   1783   if (Inv.invalidate<AAManager>(F, PA) ||
   1784       Inv.invalidate<AssumptionAnalysis>(F, PA) ||
   1785       Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
   1786       Inv.invalidate<PhiValuesAnalysis>(F, PA))
   1787     return true;
   1788 
   1789   // Otherwise this analysis result remains valid.
   1790   return false;
   1791 }
   1792 
   1793 unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
   1794   return BlockScanLimit;
   1795 }
   1796 
   1797 bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
   1798   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
   1799   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   1800   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   1801   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   1802   auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
   1803   MemDep.emplace(AA, AC, TLI, DT, PV);
   1804   return false;
   1805 }
   1806