1 //- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a CFL-base, summary-based alias analysis algorithm. It 11 // does not depend on types. The algorithm is a mixture of the one described in 12 // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast 13 // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by 14 // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a 15 // graph of the uses of a variable, where each node is a memory location, and 16 // each edge is an action that happened on that memory location. The "actions" 17 // can be one of Dereference, Reference, or Assign. The precision of this 18 // analysis is roughly the same as that of an one level context-sensitive 19 // Steensgaard's algorithm. 20 // 21 // Two variables are considered as aliasing iff you can reach one value's node 22 // from the other value's node and the language formed by concatenating all of 23 // the edge labels (actions) conforms to a context-free grammar. 24 // 25 // Because this algorithm requires a graph search on each query, we execute the 26 // algorithm outlined in "Fast algorithms..." (mentioned above) 27 // in order to transform the graph into sets of variables that may alias in 28 // ~nlogn time (n = number of variables), which makes queries take constant 29 // time. 30 //===----------------------------------------------------------------------===// 31 32 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and 33 // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because 34 // FunctionPasses are only allowed to inspect the Function that they're being 35 // run on. Realistically, this likely isn't a problem until we allow 36 // FunctionPasses to run concurrently. 37 38 #include "llvm/Analysis/CFLSteensAliasAnalysis.h" 39 #include "CFLGraph.h" 40 #include "StratifiedSets.h" 41 #include "llvm/ADT/DenseMap.h" 42 #include "llvm/ADT/None.h" 43 #include "llvm/ADT/Optional.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/IR/Constants.h" 46 #include "llvm/IR/Function.h" 47 #include "llvm/Pass.h" 48 #include "llvm/Support/Compiler.h" 49 #include "llvm/Support/Debug.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <algorithm> 53 #include <cassert> 54 #include <memory> 55 #include <tuple> 56 57 using namespace llvm; 58 using namespace llvm::cflaa; 59 60 #define DEBUG_TYPE "cfl-steens-aa" 61 62 CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI) 63 : AAResultBase(), TLI(TLI) {} 64 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) 65 : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {} 66 CFLSteensAAResult::~CFLSteensAAResult() {} 67 68 /// Information we have about a function and would like to keep around. 69 class CFLSteensAAResult::FunctionInfo { 70 StratifiedSets<InstantiatedValue> Sets; 71 AliasSummary Summary; 72 73 public: 74 FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals, 75 StratifiedSets<InstantiatedValue> S); 76 77 const StratifiedSets<InstantiatedValue> &getStratifiedSets() const { 78 return Sets; 79 } 80 const AliasSummary &getAliasSummary() const { return Summary; } 81 }; 82 83 /// Try to go from a Value* to a Function*. Never returns nullptr. 84 static Optional<Function *> parentFunctionOfValue(Value *); 85 86 const StratifiedIndex StratifiedLink::SetSentinel = 87 std::numeric_limits<StratifiedIndex>::max(); 88 89 //===----------------------------------------------------------------------===// 90 // Function declarations that require types defined in the namespace above 91 //===----------------------------------------------------------------------===// 92 93 /// Determines whether it would be pointless to add the given Value to our sets. 94 static bool canSkipAddingToSets(Value *Val); 95 96 static Optional<Function *> parentFunctionOfValue(Value *Val) { 97 if (auto *Inst = dyn_cast<Instruction>(Val)) { 98 auto *Bb = Inst->getParent(); 99 return Bb->getParent(); 100 } 101 102 if (auto *Arg = dyn_cast<Argument>(Val)) 103 return Arg->getParent(); 104 return None; 105 } 106 107 static bool canSkipAddingToSets(Value *Val) { 108 // Constants can share instances, which may falsely unify multiple 109 // sets, e.g. in 110 // store i32* null, i32** %ptr1 111 // store i32* null, i32** %ptr2 112 // clearly ptr1 and ptr2 should not be unified into the same set, so 113 // we should filter out the (potentially shared) instance to 114 // i32* null. 115 if (isa<Constant>(Val)) { 116 // TODO: Because all of these things are constant, we can determine whether 117 // the data is *actually* mutable at graph building time. This will probably 118 // come for free/cheap with offset awareness. 119 bool CanStoreMutableData = isa<GlobalValue>(Val) || 120 isa<ConstantExpr>(Val) || 121 isa<ConstantAggregate>(Val); 122 return !CanStoreMutableData; 123 } 124 125 return false; 126 } 127 128 CFLSteensAAResult::FunctionInfo::FunctionInfo( 129 Function &Fn, const SmallVectorImpl<Value *> &RetVals, 130 StratifiedSets<InstantiatedValue> S) 131 : Sets(std::move(S)) { 132 // Historically, an arbitrary upper-bound of 50 args was selected. We may want 133 // to remove this if it doesn't really matter in practice. 134 if (Fn.arg_size() > MaxSupportedArgsInSummary) 135 return; 136 137 DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap; 138 139 // Our intention here is to record all InterfaceValues that share the same 140 // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we 141 // have its StratifiedIndex scanned here and check if the index is presented 142 // in InterfaceMap: if it is not, we add the correspondence to the map; 143 // otherwise, an aliasing relation is found and we add it to 144 // RetParamRelations. 145 146 auto AddToRetParamRelations = [&](unsigned InterfaceIndex, 147 StratifiedIndex SetIndex) { 148 unsigned Level = 0; 149 while (true) { 150 InterfaceValue CurrValue{InterfaceIndex, Level}; 151 152 auto Itr = InterfaceMap.find(SetIndex); 153 if (Itr != InterfaceMap.end()) { 154 if (CurrValue != Itr->second) 155 Summary.RetParamRelations.push_back( 156 ExternalRelation{CurrValue, Itr->second}); 157 break; 158 } 159 160 auto &Link = Sets.getLink(SetIndex); 161 InterfaceMap.insert(std::make_pair(SetIndex, CurrValue)); 162 auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs); 163 if (ExternalAttrs.any()) 164 Summary.RetParamAttributes.push_back( 165 ExternalAttribute{CurrValue, ExternalAttrs}); 166 167 if (!Link.hasBelow()) 168 break; 169 170 ++Level; 171 SetIndex = Link.Below; 172 } 173 }; 174 175 // Populate RetParamRelations for return values 176 for (auto *RetVal : RetVals) { 177 assert(RetVal != nullptr); 178 assert(RetVal->getType()->isPointerTy()); 179 auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0}); 180 if (RetInfo.hasValue()) 181 AddToRetParamRelations(0, RetInfo->Index); 182 } 183 184 // Populate RetParamRelations for parameters 185 unsigned I = 0; 186 for (auto &Param : Fn.args()) { 187 if (Param.getType()->isPointerTy()) { 188 auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0}); 189 if (ParamInfo.hasValue()) 190 AddToRetParamRelations(I + 1, ParamInfo->Index); 191 } 192 ++I; 193 } 194 } 195 196 // Builds the graph + StratifiedSets for a function. 197 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { 198 CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn); 199 StratifiedSetsBuilder<InstantiatedValue> SetBuilder; 200 201 // Add all CFLGraph nodes and all Dereference edges to StratifiedSets 202 auto &Graph = GraphBuilder.getCFLGraph(); 203 for (const auto &Mapping : Graph.value_mappings()) { 204 auto Val = Mapping.first; 205 if (canSkipAddingToSets(Val)) 206 continue; 207 auto &ValueInfo = Mapping.second; 208 209 assert(ValueInfo.getNumLevels() > 0); 210 SetBuilder.add(InstantiatedValue{Val, 0}); 211 SetBuilder.noteAttributes(InstantiatedValue{Val, 0}, 212 ValueInfo.getNodeInfoAtLevel(0).Attr); 213 for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) { 214 SetBuilder.add(InstantiatedValue{Val, I + 1}); 215 SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1}, 216 ValueInfo.getNodeInfoAtLevel(I + 1).Attr); 217 SetBuilder.addBelow(InstantiatedValue{Val, I}, 218 InstantiatedValue{Val, I + 1}); 219 } 220 } 221 222 // Add all assign edges to StratifiedSets 223 for (const auto &Mapping : Graph.value_mappings()) { 224 auto Val = Mapping.first; 225 if (canSkipAddingToSets(Val)) 226 continue; 227 auto &ValueInfo = Mapping.second; 228 229 for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { 230 auto Src = InstantiatedValue{Val, I}; 231 for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) 232 SetBuilder.addWith(Src, Edge.Other); 233 } 234 } 235 236 return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build()); 237 } 238 239 void CFLSteensAAResult::scan(Function *Fn) { 240 auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>())); 241 (void)InsertPair; 242 assert(InsertPair.second && 243 "Trying to scan a function that has already been cached"); 244 245 // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call 246 // may get evaluated after operator[], potentially triggering a DenseMap 247 // resize and invalidating the reference returned by operator[] 248 auto FunInfo = buildSetsFrom(Fn); 249 Cache[Fn] = std::move(FunInfo); 250 251 Handles.push_front(FunctionHandle(Fn, this)); 252 } 253 254 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); } 255 256 /// Ensures that the given function is available in the cache, and returns the 257 /// entry. 258 const Optional<CFLSteensAAResult::FunctionInfo> & 259 CFLSteensAAResult::ensureCached(Function *Fn) { 260 auto Iter = Cache.find(Fn); 261 if (Iter == Cache.end()) { 262 scan(Fn); 263 Iter = Cache.find(Fn); 264 assert(Iter != Cache.end()); 265 assert(Iter->second.hasValue()); 266 } 267 return Iter->second; 268 } 269 270 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) { 271 auto &FunInfo = ensureCached(&Fn); 272 if (FunInfo.hasValue()) 273 return &FunInfo->getAliasSummary(); 274 else 275 return nullptr; 276 } 277 278 AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, 279 const MemoryLocation &LocB) { 280 auto *ValA = const_cast<Value *>(LocA.Ptr); 281 auto *ValB = const_cast<Value *>(LocB.Ptr); 282 283 if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) 284 return NoAlias; 285 286 Function *Fn = nullptr; 287 auto MaybeFnA = parentFunctionOfValue(ValA); 288 auto MaybeFnB = parentFunctionOfValue(ValB); 289 if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { 290 // The only times this is known to happen are when globals + InlineAsm are 291 // involved 292 DEBUG(dbgs() 293 << "CFLSteensAA: could not extract parent function information.\n"); 294 return MayAlias; 295 } 296 297 if (MaybeFnA.hasValue()) { 298 Fn = *MaybeFnA; 299 assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) && 300 "Interprocedural queries not supported"); 301 } else { 302 Fn = *MaybeFnB; 303 } 304 305 assert(Fn != nullptr); 306 auto &MaybeInfo = ensureCached(Fn); 307 assert(MaybeInfo.hasValue()); 308 309 auto &Sets = MaybeInfo->getStratifiedSets(); 310 auto MaybeA = Sets.find(InstantiatedValue{ValA, 0}); 311 if (!MaybeA.hasValue()) 312 return MayAlias; 313 314 auto MaybeB = Sets.find(InstantiatedValue{ValB, 0}); 315 if (!MaybeB.hasValue()) 316 return MayAlias; 317 318 auto SetA = *MaybeA; 319 auto SetB = *MaybeB; 320 auto AttrsA = Sets.getLink(SetA.Index).Attrs; 321 auto AttrsB = Sets.getLink(SetB.Index).Attrs; 322 323 // If both values are local (meaning the corresponding set has attribute 324 // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them: 325 // they may-alias each other if and only if they are in the same set. 326 // If at least one value is non-local (meaning it either is global/argument or 327 // it comes from unknown sources like integer cast), the situation becomes a 328 // bit more interesting. We follow three general rules described below: 329 // - Non-local values may alias each other 330 // - AttrNone values do not alias any non-local values 331 // - AttrEscaped do not alias globals/arguments, but they may alias 332 // AttrUnknown values 333 if (SetA.Index == SetB.Index) 334 return MayAlias; 335 if (AttrsA.none() || AttrsB.none()) 336 return NoAlias; 337 if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB)) 338 return MayAlias; 339 if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB)) 340 return MayAlias; 341 return NoAlias; 342 } 343 344 ModRefInfo CFLSteensAAResult::getArgModRefInfo(ImmutableCallSite CS, 345 unsigned ArgIdx) { 346 if (auto CalledFunc = CS.getCalledFunction()) { 347 auto &MaybeInfo = ensureCached(const_cast<Function *>(CalledFunc)); 348 if (!MaybeInfo.hasValue()) 349 return MRI_ModRef; 350 auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes; 351 auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations; 352 353 bool ArgAttributeIsWritten = 354 std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(), 355 [ArgIdx](const ExternalAttribute &ExtAttr) { 356 return ExtAttr.IValue.Index == ArgIdx + 1; 357 }); 358 bool ArgIsAccessed = 359 std::any_of(RetParamRelations.begin(), RetParamRelations.end(), 360 [ArgIdx](const ExternalRelation &ExtRelation) { 361 return ExtRelation.To.Index == ArgIdx + 1 || 362 ExtRelation.From.Index == ArgIdx + 1; 363 }); 364 365 return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef 366 : MRI_ModRef; 367 } 368 369 return MRI_ModRef; 370 } 371 372 FunctionModRefBehavior 373 CFLSteensAAResult::getModRefBehavior(ImmutableCallSite CS) { 374 // If we know the callee, try analyzing it 375 if (auto CalledFunc = CS.getCalledFunction()) 376 return getModRefBehavior(CalledFunc); 377 378 // Otherwise, be conservative 379 return FMRB_UnknownModRefBehavior; 380 } 381 382 FunctionModRefBehavior CFLSteensAAResult::getModRefBehavior(const Function *F) { 383 assert(F != nullptr); 384 385 // TODO: Remove the const_cast 386 auto &MaybeInfo = ensureCached(const_cast<Function *>(F)); 387 if (!MaybeInfo.hasValue()) 388 return FMRB_UnknownModRefBehavior; 389 auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes; 390 auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations; 391 392 // First, if any argument is marked Escpaed, Unknown or Global, anything may 393 // happen to them and thus we can't draw any conclusion. 394 if (!RetParamAttributes.empty()) 395 return FMRB_UnknownModRefBehavior; 396 397 // Currently we don't (and can't) distinguish reads from writes in 398 // RetParamRelations. All we can say is whether there may be memory access or 399 // not. 400 if (RetParamRelations.empty()) 401 return FMRB_DoesNotAccessMemory; 402 403 // Check if something beyond argmem gets touched. 404 bool AccessArgMemoryOnly = 405 std::all_of(RetParamRelations.begin(), RetParamRelations.end(), 406 [](const ExternalRelation &ExtRelation) { 407 // Both DerefLevels has to be 0, since we don't know which 408 // one is a read and which is a write. 409 return ExtRelation.From.DerefLevel == 0 && 410 ExtRelation.To.DerefLevel == 0; 411 }); 412 return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees 413 : FMRB_UnknownModRefBehavior; 414 } 415 416 char CFLSteensAA::PassID; 417 418 CFLSteensAAResult CFLSteensAA::run(Function &F, AnalysisManager<Function> &AM) { 419 return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F)); 420 } 421 422 char CFLSteensAAWrapperPass::ID = 0; 423 INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa", 424 "Unification-Based CFL Alias Analysis", false, true) 425 426 ImmutablePass *llvm::createCFLSteensAAWrapperPass() { 427 return new CFLSteensAAWrapperPass(); 428 } 429 430 CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { 431 initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry()); 432 } 433 434 void CFLSteensAAWrapperPass::initializePass() { 435 auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); 436 Result.reset(new CFLSteensAAResult(TLIWP.getTLI())); 437 } 438 439 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 440 AU.setPreservesAll(); 441 AU.addRequired<TargetLibraryInfoWrapperPass>(); 442 } 443