Home | History | Annotate | Download | only in Analysis
      1 //- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements a CFL-base, summary-based alias analysis algorithm. It
     11 // does not depend on types. The algorithm is a mixture of the one described in
     12 // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
     13 // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
     14 // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
     15 // graph of the uses of a variable, where each node is a memory location, and
     16 // each edge is an action that happened on that memory location.  The "actions"
     17 // can be one of Dereference, Reference, or Assign. The precision of this
     18 // analysis is roughly the same as that of an one level context-sensitive
     19 // Steensgaard's algorithm.
     20 //
     21 // Two variables are considered as aliasing iff you can reach one value's node
     22 // from the other value's node and the language formed by concatenating all of
     23 // the edge labels (actions) conforms to a context-free grammar.
     24 //
     25 // Because this algorithm requires a graph search on each query, we execute the
     26 // algorithm outlined in "Fast algorithms..." (mentioned above)
     27 // in order to transform the graph into sets of variables that may alias in
     28 // ~nlogn time (n = number of variables), which makes queries take constant
     29 // time.
     30 //===----------------------------------------------------------------------===//
     31 
     32 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
     33 // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
     34 // FunctionPasses are only allowed to inspect the Function that they're being
     35 // run on. Realistically, this likely isn't a problem until we allow
     36 // FunctionPasses to run concurrently.
     37 
     38 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
     39 #include "CFLGraph.h"
     40 #include "StratifiedSets.h"
     41 #include "llvm/ADT/DenseMap.h"
     42 #include "llvm/ADT/None.h"
     43 #include "llvm/ADT/Optional.h"
     44 #include "llvm/Analysis/TargetLibraryInfo.h"
     45 #include "llvm/IR/Constants.h"
     46 #include "llvm/IR/Function.h"
     47 #include "llvm/Pass.h"
     48 #include "llvm/Support/Compiler.h"
     49 #include "llvm/Support/Debug.h"
     50 #include "llvm/Support/ErrorHandling.h"
     51 #include "llvm/Support/raw_ostream.h"
     52 #include <algorithm>
     53 #include <cassert>
     54 #include <memory>
     55 #include <tuple>
     56 
     57 using namespace llvm;
     58 using namespace llvm::cflaa;
     59 
     60 #define DEBUG_TYPE "cfl-steens-aa"
     61 
     62 CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI)
     63     : AAResultBase(), TLI(TLI) {}
     64 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
     65     : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
     66 CFLSteensAAResult::~CFLSteensAAResult() {}
     67 
     68 /// Information we have about a function and would like to keep around.
     69 class CFLSteensAAResult::FunctionInfo {
     70   StratifiedSets<InstantiatedValue> Sets;
     71   AliasSummary Summary;
     72 
     73 public:
     74   FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
     75                StratifiedSets<InstantiatedValue> S);
     76 
     77   const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
     78     return Sets;
     79   }
     80   const AliasSummary &getAliasSummary() const { return Summary; }
     81 };
     82 
     83 /// Try to go from a Value* to a Function*. Never returns nullptr.
     84 static Optional<Function *> parentFunctionOfValue(Value *);
     85 
     86 const StratifiedIndex StratifiedLink::SetSentinel =
     87     std::numeric_limits<StratifiedIndex>::max();
     88 
     89 //===----------------------------------------------------------------------===//
     90 // Function declarations that require types defined in the namespace above
     91 //===----------------------------------------------------------------------===//
     92 
     93 /// Determines whether it would be pointless to add the given Value to our sets.
     94 static bool canSkipAddingToSets(Value *Val);
     95 
     96 static Optional<Function *> parentFunctionOfValue(Value *Val) {
     97   if (auto *Inst = dyn_cast<Instruction>(Val)) {
     98     auto *Bb = Inst->getParent();
     99     return Bb->getParent();
    100   }
    101 
    102   if (auto *Arg = dyn_cast<Argument>(Val))
    103     return Arg->getParent();
    104   return None;
    105 }
    106 
    107 static bool canSkipAddingToSets(Value *Val) {
    108   // Constants can share instances, which may falsely unify multiple
    109   // sets, e.g. in
    110   // store i32* null, i32** %ptr1
    111   // store i32* null, i32** %ptr2
    112   // clearly ptr1 and ptr2 should not be unified into the same set, so
    113   // we should filter out the (potentially shared) instance to
    114   // i32* null.
    115   if (isa<Constant>(Val)) {
    116     // TODO: Because all of these things are constant, we can determine whether
    117     // the data is *actually* mutable at graph building time. This will probably
    118     // come for free/cheap with offset awareness.
    119     bool CanStoreMutableData = isa<GlobalValue>(Val) ||
    120                                isa<ConstantExpr>(Val) ||
    121                                isa<ConstantAggregate>(Val);
    122     return !CanStoreMutableData;
    123   }
    124 
    125   return false;
    126 }
    127 
    128 CFLSteensAAResult::FunctionInfo::FunctionInfo(
    129     Function &Fn, const SmallVectorImpl<Value *> &RetVals,
    130     StratifiedSets<InstantiatedValue> S)
    131     : Sets(std::move(S)) {
    132   // Historically, an arbitrary upper-bound of 50 args was selected. We may want
    133   // to remove this if it doesn't really matter in practice.
    134   if (Fn.arg_size() > MaxSupportedArgsInSummary)
    135     return;
    136 
    137   DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
    138 
    139   // Our intention here is to record all InterfaceValues that share the same
    140   // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
    141   // have its StratifiedIndex scanned here and check if the index is presented
    142   // in InterfaceMap: if it is not, we add the correspondence to the map;
    143   // otherwise, an aliasing relation is found and we add it to
    144   // RetParamRelations.
    145 
    146   auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
    147                                     StratifiedIndex SetIndex) {
    148     unsigned Level = 0;
    149     while (true) {
    150       InterfaceValue CurrValue{InterfaceIndex, Level};
    151 
    152       auto Itr = InterfaceMap.find(SetIndex);
    153       if (Itr != InterfaceMap.end()) {
    154         if (CurrValue != Itr->second)
    155           Summary.RetParamRelations.push_back(
    156               ExternalRelation{CurrValue, Itr->second});
    157         break;
    158       }
    159 
    160       auto &Link = Sets.getLink(SetIndex);
    161       InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
    162       auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
    163       if (ExternalAttrs.any())
    164         Summary.RetParamAttributes.push_back(
    165             ExternalAttribute{CurrValue, ExternalAttrs});
    166 
    167       if (!Link.hasBelow())
    168         break;
    169 
    170       ++Level;
    171       SetIndex = Link.Below;
    172     }
    173   };
    174 
    175   // Populate RetParamRelations for return values
    176   for (auto *RetVal : RetVals) {
    177     assert(RetVal != nullptr);
    178     assert(RetVal->getType()->isPointerTy());
    179     auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
    180     if (RetInfo.hasValue())
    181       AddToRetParamRelations(0, RetInfo->Index);
    182   }
    183 
    184   // Populate RetParamRelations for parameters
    185   unsigned I = 0;
    186   for (auto &Param : Fn.args()) {
    187     if (Param.getType()->isPointerTy()) {
    188       auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
    189       if (ParamInfo.hasValue())
    190         AddToRetParamRelations(I + 1, ParamInfo->Index);
    191     }
    192     ++I;
    193   }
    194 }
    195 
    196 // Builds the graph + StratifiedSets for a function.
    197 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
    198   CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn);
    199   StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
    200 
    201   // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
    202   auto &Graph = GraphBuilder.getCFLGraph();
    203   for (const auto &Mapping : Graph.value_mappings()) {
    204     auto Val = Mapping.first;
    205     if (canSkipAddingToSets(Val))
    206       continue;
    207     auto &ValueInfo = Mapping.second;
    208 
    209     assert(ValueInfo.getNumLevels() > 0);
    210     SetBuilder.add(InstantiatedValue{Val, 0});
    211     SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
    212                               ValueInfo.getNodeInfoAtLevel(0).Attr);
    213     for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
    214       SetBuilder.add(InstantiatedValue{Val, I + 1});
    215       SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
    216                                 ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
    217       SetBuilder.addBelow(InstantiatedValue{Val, I},
    218                           InstantiatedValue{Val, I + 1});
    219     }
    220   }
    221 
    222   // Add all assign edges to StratifiedSets
    223   for (const auto &Mapping : Graph.value_mappings()) {
    224     auto Val = Mapping.first;
    225     if (canSkipAddingToSets(Val))
    226       continue;
    227     auto &ValueInfo = Mapping.second;
    228 
    229     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
    230       auto Src = InstantiatedValue{Val, I};
    231       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
    232         SetBuilder.addWith(Src, Edge.Other);
    233     }
    234   }
    235 
    236   return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
    237 }
    238 
    239 void CFLSteensAAResult::scan(Function *Fn) {
    240   auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
    241   (void)InsertPair;
    242   assert(InsertPair.second &&
    243          "Trying to scan a function that has already been cached");
    244 
    245   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
    246   // may get evaluated after operator[], potentially triggering a DenseMap
    247   // resize and invalidating the reference returned by operator[]
    248   auto FunInfo = buildSetsFrom(Fn);
    249   Cache[Fn] = std::move(FunInfo);
    250 
    251   Handles.push_front(FunctionHandle(Fn, this));
    252 }
    253 
    254 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
    255 
    256 /// Ensures that the given function is available in the cache, and returns the
    257 /// entry.
    258 const Optional<CFLSteensAAResult::FunctionInfo> &
    259 CFLSteensAAResult::ensureCached(Function *Fn) {
    260   auto Iter = Cache.find(Fn);
    261   if (Iter == Cache.end()) {
    262     scan(Fn);
    263     Iter = Cache.find(Fn);
    264     assert(Iter != Cache.end());
    265     assert(Iter->second.hasValue());
    266   }
    267   return Iter->second;
    268 }
    269 
    270 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
    271   auto &FunInfo = ensureCached(&Fn);
    272   if (FunInfo.hasValue())
    273     return &FunInfo->getAliasSummary();
    274   else
    275     return nullptr;
    276 }
    277 
    278 AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
    279                                      const MemoryLocation &LocB) {
    280   auto *ValA = const_cast<Value *>(LocA.Ptr);
    281   auto *ValB = const_cast<Value *>(LocB.Ptr);
    282 
    283   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
    284     return NoAlias;
    285 
    286   Function *Fn = nullptr;
    287   auto MaybeFnA = parentFunctionOfValue(ValA);
    288   auto MaybeFnB = parentFunctionOfValue(ValB);
    289   if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
    290     // The only times this is known to happen are when globals + InlineAsm are
    291     // involved
    292     DEBUG(dbgs()
    293           << "CFLSteensAA: could not extract parent function information.\n");
    294     return MayAlias;
    295   }
    296 
    297   if (MaybeFnA.hasValue()) {
    298     Fn = *MaybeFnA;
    299     assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
    300            "Interprocedural queries not supported");
    301   } else {
    302     Fn = *MaybeFnB;
    303   }
    304 
    305   assert(Fn != nullptr);
    306   auto &MaybeInfo = ensureCached(Fn);
    307   assert(MaybeInfo.hasValue());
    308 
    309   auto &Sets = MaybeInfo->getStratifiedSets();
    310   auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
    311   if (!MaybeA.hasValue())
    312     return MayAlias;
    313 
    314   auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
    315   if (!MaybeB.hasValue())
    316     return MayAlias;
    317 
    318   auto SetA = *MaybeA;
    319   auto SetB = *MaybeB;
    320   auto AttrsA = Sets.getLink(SetA.Index).Attrs;
    321   auto AttrsB = Sets.getLink(SetB.Index).Attrs;
    322 
    323   // If both values are local (meaning the corresponding set has attribute
    324   // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
    325   // they may-alias each other if and only if they are in the same set.
    326   // If at least one value is non-local (meaning it either is global/argument or
    327   // it comes from unknown sources like integer cast), the situation becomes a
    328   // bit more interesting. We follow three general rules described below:
    329   // - Non-local values may alias each other
    330   // - AttrNone values do not alias any non-local values
    331   // - AttrEscaped do not alias globals/arguments, but they may alias
    332   // AttrUnknown values
    333   if (SetA.Index == SetB.Index)
    334     return MayAlias;
    335   if (AttrsA.none() || AttrsB.none())
    336     return NoAlias;
    337   if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
    338     return MayAlias;
    339   if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
    340     return MayAlias;
    341   return NoAlias;
    342 }
    343 
    344 ModRefInfo CFLSteensAAResult::getArgModRefInfo(ImmutableCallSite CS,
    345                                                unsigned ArgIdx) {
    346   if (auto CalledFunc = CS.getCalledFunction()) {
    347     auto &MaybeInfo = ensureCached(const_cast<Function *>(CalledFunc));
    348     if (!MaybeInfo.hasValue())
    349       return MRI_ModRef;
    350     auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
    351     auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
    352 
    353     bool ArgAttributeIsWritten =
    354         std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(),
    355                     [ArgIdx](const ExternalAttribute &ExtAttr) {
    356                       return ExtAttr.IValue.Index == ArgIdx + 1;
    357                     });
    358     bool ArgIsAccessed =
    359         std::any_of(RetParamRelations.begin(), RetParamRelations.end(),
    360                     [ArgIdx](const ExternalRelation &ExtRelation) {
    361                       return ExtRelation.To.Index == ArgIdx + 1 ||
    362                              ExtRelation.From.Index == ArgIdx + 1;
    363                     });
    364 
    365     return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef
    366                                                       : MRI_ModRef;
    367   }
    368 
    369   return MRI_ModRef;
    370 }
    371 
    372 FunctionModRefBehavior
    373 CFLSteensAAResult::getModRefBehavior(ImmutableCallSite CS) {
    374   // If we know the callee, try analyzing it
    375   if (auto CalledFunc = CS.getCalledFunction())
    376     return getModRefBehavior(CalledFunc);
    377 
    378   // Otherwise, be conservative
    379   return FMRB_UnknownModRefBehavior;
    380 }
    381 
    382 FunctionModRefBehavior CFLSteensAAResult::getModRefBehavior(const Function *F) {
    383   assert(F != nullptr);
    384 
    385   // TODO: Remove the const_cast
    386   auto &MaybeInfo = ensureCached(const_cast<Function *>(F));
    387   if (!MaybeInfo.hasValue())
    388     return FMRB_UnknownModRefBehavior;
    389   auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
    390   auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
    391 
    392   // First, if any argument is marked Escpaed, Unknown or Global, anything may
    393   // happen to them and thus we can't draw any conclusion.
    394   if (!RetParamAttributes.empty())
    395     return FMRB_UnknownModRefBehavior;
    396 
    397   // Currently we don't (and can't) distinguish reads from writes in
    398   // RetParamRelations. All we can say is whether there may be memory access or
    399   // not.
    400   if (RetParamRelations.empty())
    401     return FMRB_DoesNotAccessMemory;
    402 
    403   // Check if something beyond argmem gets touched.
    404   bool AccessArgMemoryOnly =
    405       std::all_of(RetParamRelations.begin(), RetParamRelations.end(),
    406                   [](const ExternalRelation &ExtRelation) {
    407                     // Both DerefLevels has to be 0, since we don't know which
    408                     // one is a read and which is a write.
    409                     return ExtRelation.From.DerefLevel == 0 &&
    410                            ExtRelation.To.DerefLevel == 0;
    411                   });
    412   return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees
    413                              : FMRB_UnknownModRefBehavior;
    414 }
    415 
    416 char CFLSteensAA::PassID;
    417 
    418 CFLSteensAAResult CFLSteensAA::run(Function &F, AnalysisManager<Function> &AM) {
    419   return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
    420 }
    421 
    422 char CFLSteensAAWrapperPass::ID = 0;
    423 INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
    424                 "Unification-Based CFL Alias Analysis", false, true)
    425 
    426 ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
    427   return new CFLSteensAAWrapperPass();
    428 }
    429 
    430 CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
    431   initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
    432 }
    433 
    434 void CFLSteensAAWrapperPass::initializePass() {
    435   auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
    436   Result.reset(new CFLSteensAAResult(TLIWP.getTLI()));
    437 }
    438 
    439 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
    440   AU.setPreservesAll();
    441   AU.addRequired<TargetLibraryInfoWrapperPass>();
    442 }
    443