1 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the TypeBasedAliasAnalysis pass, which implements 11 // metadata-based TBAA. 12 // 13 // In LLVM IR, memory does not have types, so LLVM's own type system is not 14 // suitable for doing TBAA. Instead, metadata is added to the IR to describe 15 // a type system of a higher level language. This can be used to implement 16 // typical C/C++ TBAA, but it can also be used to implement custom alias 17 // analysis behavior for other languages. 18 // 19 // The current metadata format is very simple. TBAA MDNodes have up to 20 // three fields, e.g.: 21 // !0 = metadata !{ metadata !"an example type tree" } 22 // !1 = metadata !{ metadata !"int", metadata !0 } 23 // !2 = metadata !{ metadata !"float", metadata !0 } 24 // !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } 25 // 26 // The first field is an identity field. It can be any value, usually 27 // an MDString, which uniquely identifies the type. The most important 28 // name in the tree is the name of the root node. Two trees with 29 // different root node names are entirely disjoint, even if they 30 // have leaves with common names. 31 // 32 // The second field identifies the type's parent node in the tree, or 33 // is null or omitted for a root node. A type is considered to alias 34 // all of its descendants and all of its ancestors in the tree. Also, 35 // a type is considered to alias all types in other trees, so that 36 // bitcode produced from multiple front-ends is handled conservatively. 37 // 38 // If the third field is present, it's an integer which if equal to 1 39 // indicates that the type is "constant" (meaning pointsToConstantMemory 40 // should return true; see 41 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). 42 // 43 // TODO: The current metadata format doesn't support struct 44 // fields. For example: 45 // struct X { 46 // double d; 47 // int i; 48 // }; 49 // void foo(struct X *x, struct X *y, double *p) { 50 // *x = *y; 51 // *p = 0.0; 52 // } 53 // Struct X has a double member, so the store to *x can alias the store to *p. 54 // Currently it's not possible to precisely describe all the things struct X 55 // aliases, so struct assignments must use conservative TBAA nodes. There's 56 // no scheme for attaching metadata to @llvm.memcpy yet either. 57 // 58 //===----------------------------------------------------------------------===// 59 60 #include "llvm/Analysis/Passes.h" 61 #include "llvm/Analysis/AliasAnalysis.h" 62 #include "llvm/IR/Constants.h" 63 #include "llvm/IR/LLVMContext.h" 64 #include "llvm/IR/Metadata.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/Pass.h" 67 #include "llvm/Support/CommandLine.h" 68 using namespace llvm; 69 70 // A handy option for disabling TBAA functionality. The same effect can also be 71 // achieved by stripping the !tbaa tags from IR, but this option is sometimes 72 // more convenient. 73 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); 74 static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false)); 75 76 namespace { 77 /// TBAANode - This is a simple wrapper around an MDNode which provides a 78 /// higher-level interface by hiding the details of how alias analysis 79 /// information is encoded in its operands. 80 class TBAANode { 81 const MDNode *Node; 82 83 public: 84 TBAANode() : Node(0) {} 85 explicit TBAANode(const MDNode *N) : Node(N) {} 86 87 /// getNode - Get the MDNode for this TBAANode. 88 const MDNode *getNode() const { return Node; } 89 90 /// getParent - Get this TBAANode's Alias tree parent. 91 TBAANode getParent() const { 92 if (Node->getNumOperands() < 2) 93 return TBAANode(); 94 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 95 if (!P) 96 return TBAANode(); 97 // Ok, this node has a valid parent. Return it. 98 return TBAANode(P); 99 } 100 101 /// TypeIsImmutable - Test if this TBAANode represents a type for objects 102 /// which are not modified (by any means) in the context where this 103 /// AliasAnalysis is relevant. 104 bool TypeIsImmutable() const { 105 if (Node->getNumOperands() < 3) 106 return false; 107 ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); 108 if (!CI) 109 return false; 110 return CI->getValue()[0]; 111 } 112 }; 113 114 /// This is a simple wrapper around an MDNode which provides a 115 /// higher-level interface by hiding the details of how alias analysis 116 /// information is encoded in its operands. 117 class TBAAStructTagNode { 118 /// This node should be created with createTBAAStructTagNode. 119 const MDNode *Node; 120 121 public: 122 TBAAStructTagNode() : Node(0) {} 123 explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} 124 125 /// Get the MDNode for this TBAAStructTagNode. 126 const MDNode *getNode() const { return Node; } 127 128 const MDNode *getBaseType() const { 129 return dyn_cast_or_null<MDNode>(Node->getOperand(0)); 130 } 131 const MDNode *getAccessType() const { 132 return dyn_cast_or_null<MDNode>(Node->getOperand(1)); 133 } 134 uint64_t getOffset() const { 135 return cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); 136 } 137 /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for 138 /// objects which are not modified (by any means) in the context where this 139 /// AliasAnalysis is relevant. 140 bool TypeIsImmutable() const { 141 if (Node->getNumOperands() < 4) 142 return false; 143 ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3)); 144 if (!CI) 145 return false; 146 return CI->getValue()[0]; 147 } 148 }; 149 150 /// This is a simple wrapper around an MDNode which provides a 151 /// higher-level interface by hiding the details of how alias analysis 152 /// information is encoded in its operands. 153 class TBAAStructTypeNode { 154 /// This node should be created with createTBAAStructTypeNode. 155 const MDNode *Node; 156 157 public: 158 TBAAStructTypeNode() : Node(0) {} 159 explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} 160 161 /// Get the MDNode for this TBAAStructTypeNode. 162 const MDNode *getNode() const { return Node; } 163 164 /// Get this TBAAStructTypeNode's field in the type DAG with 165 /// given offset. Update the offset to be relative to the field type. 166 TBAAStructTypeNode getParent(uint64_t &Offset) const { 167 // Parent can be omitted for the root node. 168 if (Node->getNumOperands() < 2) 169 return TBAAStructTypeNode(); 170 171 // Special handling for a scalar type node. 172 if (Node->getNumOperands() <= 3) { 173 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 174 if (!P) 175 return TBAAStructTypeNode(); 176 return TBAAStructTypeNode(P); 177 } 178 179 // Assume the offsets are in order. We return the previous field if 180 // the current offset is bigger than the given offset. 181 unsigned TheIdx = 0; 182 for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { 183 uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))-> 184 getZExtValue(); 185 if (Cur > Offset) { 186 assert(Idx >= 3 && 187 "TBAAStructTypeNode::getParent should have an offset match!"); 188 TheIdx = Idx - 2; 189 break; 190 } 191 } 192 // Move along the last field. 193 if (TheIdx == 0) 194 TheIdx = Node->getNumOperands() - 2; 195 uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))-> 196 getZExtValue(); 197 Offset -= Cur; 198 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); 199 if (!P) 200 return TBAAStructTypeNode(); 201 return TBAAStructTypeNode(P); 202 } 203 }; 204 } 205 206 namespace { 207 /// TypeBasedAliasAnalysis - This is a simple alias analysis 208 /// implementation that uses TypeBased to answer queries. 209 class TypeBasedAliasAnalysis : public ImmutablePass, 210 public AliasAnalysis { 211 public: 212 static char ID; // Class identification, replacement for typeinfo 213 TypeBasedAliasAnalysis() : ImmutablePass(ID) { 214 initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); 215 } 216 217 virtual void initializePass() { 218 InitializeAliasAnalysis(this); 219 } 220 221 /// getAdjustedAnalysisPointer - This method is used when a pass implements 222 /// an analysis interface through multiple inheritance. If needed, it 223 /// should override this to adjust the this pointer as needed for the 224 /// specified pass info. 225 virtual void *getAdjustedAnalysisPointer(const void *PI) { 226 if (PI == &AliasAnalysis::ID) 227 return (AliasAnalysis*)this; 228 return this; 229 } 230 231 bool Aliases(const MDNode *A, const MDNode *B) const; 232 bool PathAliases(const MDNode *A, const MDNode *B) const; 233 234 private: 235 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 236 virtual AliasResult alias(const Location &LocA, const Location &LocB); 237 virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); 238 virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); 239 virtual ModRefBehavior getModRefBehavior(const Function *F); 240 virtual ModRefResult getModRefInfo(ImmutableCallSite CS, 241 const Location &Loc); 242 virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, 243 ImmutableCallSite CS2); 244 }; 245 } // End of anonymous namespace 246 247 // Register this pass... 248 char TypeBasedAliasAnalysis::ID = 0; 249 INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", 250 "Type-Based Alias Analysis", false, true, false) 251 252 ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { 253 return new TypeBasedAliasAnalysis(); 254 } 255 256 void 257 TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 258 AU.setPreservesAll(); 259 AliasAnalysis::getAnalysisUsage(AU); 260 } 261 262 /// Aliases - Test whether the type represented by A may alias the 263 /// type represented by B. 264 bool 265 TypeBasedAliasAnalysis::Aliases(const MDNode *A, 266 const MDNode *B) const { 267 if (EnableStructPathTBAA) 268 return PathAliases(A, B); 269 270 // Keep track of the root node for A and B. 271 TBAANode RootA, RootB; 272 273 // Climb the tree from A to see if we reach B. 274 for (TBAANode T(A); ; ) { 275 if (T.getNode() == B) 276 // B is an ancestor of A. 277 return true; 278 279 RootA = T; 280 T = T.getParent(); 281 if (!T.getNode()) 282 break; 283 } 284 285 // Climb the tree from B to see if we reach A. 286 for (TBAANode T(B); ; ) { 287 if (T.getNode() == A) 288 // A is an ancestor of B. 289 return true; 290 291 RootB = T; 292 T = T.getParent(); 293 if (!T.getNode()) 294 break; 295 } 296 297 // Neither node is an ancestor of the other. 298 299 // If they have different roots, they're part of different potentially 300 // unrelated type systems, so we must be conservative. 301 if (RootA.getNode() != RootB.getNode()) 302 return true; 303 304 // If they have the same root, then we've proved there's no alias. 305 return false; 306 } 307 308 /// Test whether the struct-path tag represented by A may alias the 309 /// struct-path tag represented by B. 310 bool 311 TypeBasedAliasAnalysis::PathAliases(const MDNode *A, 312 const MDNode *B) const { 313 // Keep track of the root node for A and B. 314 TBAAStructTypeNode RootA, RootB; 315 TBAAStructTagNode TagA(A), TagB(B); 316 317 // TODO: We need to check if AccessType of TagA encloses AccessType of 318 // TagB to support aggregate AccessType. If yes, return true. 319 320 // Start from the base type of A, follow the edge with the correct offset in 321 // the type DAG and adjust the offset until we reach the base type of B or 322 // until we reach the Root node. 323 // Compare the adjusted offset once we have the same base. 324 325 // Climb the type DAG from base type of A to see if we reach base type of B. 326 const MDNode *BaseA = TagA.getBaseType(); 327 const MDNode *BaseB = TagB.getBaseType(); 328 uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); 329 for (TBAAStructTypeNode T(BaseA); ; ) { 330 if (T.getNode() == BaseB) 331 // Base type of A encloses base type of B, check if the offsets match. 332 return OffsetA == OffsetB; 333 334 RootA = T; 335 // Follow the edge with the correct offset, OffsetA will be adjusted to 336 // be relative to the field type. 337 T = T.getParent(OffsetA); 338 if (!T.getNode()) 339 break; 340 } 341 342 // Reset OffsetA and climb the type DAG from base type of B to see if we reach 343 // base type of A. 344 OffsetA = TagA.getOffset(); 345 for (TBAAStructTypeNode T(BaseB); ; ) { 346 if (T.getNode() == BaseA) 347 // Base type of B encloses base type of A, check if the offsets match. 348 return OffsetA == OffsetB; 349 350 RootB = T; 351 // Follow the edge with the correct offset, OffsetB will be adjusted to 352 // be relative to the field type. 353 T = T.getParent(OffsetB); 354 if (!T.getNode()) 355 break; 356 } 357 358 // Neither node is an ancestor of the other. 359 360 // If they have different roots, they're part of different potentially 361 // unrelated type systems, so we must be conservative. 362 if (RootA.getNode() != RootB.getNode()) 363 return true; 364 365 // If they have the same root, then we've proved there's no alias. 366 return false; 367 } 368 369 AliasAnalysis::AliasResult 370 TypeBasedAliasAnalysis::alias(const Location &LocA, 371 const Location &LocB) { 372 if (!EnableTBAA) 373 return AliasAnalysis::alias(LocA, LocB); 374 375 // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must 376 // be conservative. 377 const MDNode *AM = LocA.TBAATag; 378 if (!AM) return AliasAnalysis::alias(LocA, LocB); 379 const MDNode *BM = LocB.TBAATag; 380 if (!BM) return AliasAnalysis::alias(LocA, LocB); 381 382 // If they may alias, chain to the next AliasAnalysis. 383 if (Aliases(AM, BM)) 384 return AliasAnalysis::alias(LocA, LocB); 385 386 // Otherwise return a definitive result. 387 return NoAlias; 388 } 389 390 bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, 391 bool OrLocal) { 392 if (!EnableTBAA) 393 return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 394 395 const MDNode *M = Loc.TBAATag; 396 if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 397 398 // If this is an "immutable" type, we can assume the pointer is pointing 399 // to constant memory. 400 if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) || 401 (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable())) 402 return true; 403 404 return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 405 } 406 407 AliasAnalysis::ModRefBehavior 408 TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { 409 if (!EnableTBAA) 410 return AliasAnalysis::getModRefBehavior(CS); 411 412 ModRefBehavior Min = UnknownModRefBehavior; 413 414 // If this is an "immutable" type, we can assume the call doesn't write 415 // to memory. 416 if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 417 if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) || 418 (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable())) 419 Min = OnlyReadsMemory; 420 421 return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); 422 } 423 424 AliasAnalysis::ModRefBehavior 425 TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { 426 // Functions don't have metadata. Just chain to the next implementation. 427 return AliasAnalysis::getModRefBehavior(F); 428 } 429 430 AliasAnalysis::ModRefResult 431 TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, 432 const Location &Loc) { 433 if (!EnableTBAA) 434 return AliasAnalysis::getModRefInfo(CS, Loc); 435 436 if (const MDNode *L = Loc.TBAATag) 437 if (const MDNode *M = 438 CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 439 if (!Aliases(L, M)) 440 return NoModRef; 441 442 return AliasAnalysis::getModRefInfo(CS, Loc); 443 } 444 445 AliasAnalysis::ModRefResult 446 TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, 447 ImmutableCallSite CS2) { 448 if (!EnableTBAA) 449 return AliasAnalysis::getModRefInfo(CS1, CS2); 450 451 if (const MDNode *M1 = 452 CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 453 if (const MDNode *M2 = 454 CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 455 if (!Aliases(M1, M2)) 456 return NoModRef; 457 458 return AliasAnalysis::getModRefInfo(CS1, CS2); 459 } 460 461 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { 462 if (!A || !B) 463 return NULL; 464 465 if (A == B) 466 return A; 467 468 // For struct-path aware TBAA, we use the access type of the tag. 469 if (EnableStructPathTBAA) { 470 A = cast_or_null<MDNode>(A->getOperand(1)); 471 if (!A) return 0; 472 B = cast_or_null<MDNode>(B->getOperand(1)); 473 if (!B) return 0; 474 } 475 476 SmallVector<MDNode *, 4> PathA; 477 MDNode *T = A; 478 while (T) { 479 PathA.push_back(T); 480 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; 481 } 482 483 SmallVector<MDNode *, 4> PathB; 484 T = B; 485 while (T) { 486 PathB.push_back(T); 487 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; 488 } 489 490 int IA = PathA.size() - 1; 491 int IB = PathB.size() - 1; 492 493 MDNode *Ret = 0; 494 while (IA >= 0 && IB >=0) { 495 if (PathA[IA] == PathB[IB]) 496 Ret = PathA[IA]; 497 else 498 break; 499 --IA; 500 --IB; 501 } 502 if (!EnableStructPathTBAA) 503 return Ret; 504 505 if (!Ret) 506 return 0; 507 // We need to convert from a type node to a tag node. 508 Type *Int64 = IntegerType::get(A->getContext(), 64); 509 Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; 510 return MDNode::get(A->getContext(), Ops); 511 } 512