1 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the TypeBasedAliasAnalysis pass, which implements 11 // metadata-based TBAA. 12 // 13 // In LLVM IR, memory does not have types, so LLVM's own type system is not 14 // suitable for doing TBAA. Instead, metadata is added to the IR to describe 15 // a type system of a higher level language. This can be used to implement 16 // typical C/C++ TBAA, but it can also be used to implement custom alias 17 // analysis behavior for other languages. 18 // 19 // We now support two types of metadata format: scalar TBAA and struct-path 20 // aware TBAA. After all testing cases are upgraded to use struct-path aware 21 // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA 22 // can be dropped. 23 // 24 // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to 25 // three fields, e.g.: 26 // !0 = metadata !{ metadata !"an example type tree" } 27 // !1 = metadata !{ metadata !"int", metadata !0 } 28 // !2 = metadata !{ metadata !"float", metadata !0 } 29 // !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } 30 // 31 // The first field is an identity field. It can be any value, usually 32 // an MDString, which uniquely identifies the type. The most important 33 // name in the tree is the name of the root node. Two trees with 34 // different root node names are entirely disjoint, even if they 35 // have leaves with common names. 36 // 37 // The second field identifies the type's parent node in the tree, or 38 // is null or omitted for a root node. A type is considered to alias 39 // all of its descendants and all of its ancestors in the tree. Also, 40 // a type is considered to alias all types in other trees, so that 41 // bitcode produced from multiple front-ends is handled conservatively. 42 // 43 // If the third field is present, it's an integer which if equal to 1 44 // indicates that the type is "constant" (meaning pointsToConstantMemory 45 // should return true; see 46 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). 47 // 48 // With struct-path aware TBAA, the MDNodes attached to an instruction using 49 // "!tbaa" are called path tag nodes. 50 // 51 // The path tag node has 4 fields with the last field being optional. 52 // 53 // The first field is the base type node, it can be a struct type node 54 // or a scalar type node. The second field is the access type node, it 55 // must be a scalar type node. The third field is the offset into the base type. 56 // The last field has the same meaning as the last field of our scalar TBAA: 57 // it's an integer which if equal to 1 indicates that the access is "constant". 58 // 59 // The struct type node has a name and a list of pairs, one pair for each member 60 // of the struct. The first element of each pair is a type node (a struct type 61 // node or a sclar type node), specifying the type of the member, the second 62 // element of each pair is the offset of the member. 63 // 64 // Given an example 65 // typedef struct { 66 // short s; 67 // } A; 68 // typedef struct { 69 // uint16_t s; 70 // A a; 71 // } B; 72 // 73 // For an access to B.a.s, we attach !5 (a path tag node) to the load/store 74 // instruction. The base type is !4 (struct B), the access type is !2 (scalar 75 // type short) and the offset is 4. 76 // 77 // !0 = metadata !{metadata !"Simple C/C++ TBAA"} 78 // !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node 79 // !2 = metadata !{metadata !"short", metadata !1} // Scalar type node 80 // !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node 81 // !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4} 82 // // Struct type node 83 // !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node 84 // 85 // The struct type nodes and the scalar type nodes form a type DAG. 86 // Root (!0) 87 // char (!1) -- edge to Root 88 // short (!2) -- edge to char 89 // A (!3) -- edge with offset 0 to short 90 // B (!4) -- edge with offset 0 to short and edge with offset 4 to A 91 // 92 // To check if two tags (tagX and tagY) can alias, we start from the base type 93 // of tagX, follow the edge with the correct offset in the type DAG and adjust 94 // the offset until we reach the base type of tagY or until we reach the Root 95 // node. 96 // If we reach the base type of tagY, compare the adjusted offset with 97 // offset of tagY, return Alias if the offsets are the same, return NoAlias 98 // otherwise. 99 // If we reach the Root node, perform the above starting from base type of tagY 100 // to see if we reach base type of tagX. 101 // 102 // If they have different roots, they're part of different potentially 103 // unrelated type systems, so we return Alias to be conservative. 104 // If neither node is an ancestor of the other and they have the same root, 105 // then we say NoAlias. 106 // 107 // TODO: The current metadata format doesn't support struct 108 // fields. For example: 109 // struct X { 110 // double d; 111 // int i; 112 // }; 113 // void foo(struct X *x, struct X *y, double *p) { 114 // *x = *y; 115 // *p = 0.0; 116 // } 117 // Struct X has a double member, so the store to *x can alias the store to *p. 118 // Currently it's not possible to precisely describe all the things struct X 119 // aliases, so struct assignments must use conservative TBAA nodes. There's 120 // no scheme for attaching metadata to @llvm.memcpy yet either. 121 // 122 //===----------------------------------------------------------------------===// 123 124 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 125 #include "llvm/ADT/SetVector.h" 126 #include "llvm/IR/Constants.h" 127 #include "llvm/IR/LLVMContext.h" 128 #include "llvm/IR/Module.h" 129 #include "llvm/Support/CommandLine.h" 130 using namespace llvm; 131 132 // A handy option for disabling TBAA functionality. The same effect can also be 133 // achieved by stripping the !tbaa tags from IR, but this option is sometimes 134 // more convenient. 135 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); 136 137 namespace { 138 /// TBAANode - This is a simple wrapper around an MDNode which provides a 139 /// higher-level interface by hiding the details of how alias analysis 140 /// information is encoded in its operands. 141 class TBAANode { 142 const MDNode *Node; 143 144 public: 145 TBAANode() : Node(nullptr) {} 146 explicit TBAANode(const MDNode *N) : Node(N) {} 147 148 /// getNode - Get the MDNode for this TBAANode. 149 const MDNode *getNode() const { return Node; } 150 151 /// getParent - Get this TBAANode's Alias tree parent. 152 TBAANode getParent() const { 153 if (Node->getNumOperands() < 2) 154 return TBAANode(); 155 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 156 if (!P) 157 return TBAANode(); 158 // Ok, this node has a valid parent. Return it. 159 return TBAANode(P); 160 } 161 162 /// TypeIsImmutable - Test if this TBAANode represents a type for objects 163 /// which are not modified (by any means) in the context where this 164 /// AliasAnalysis is relevant. 165 bool TypeIsImmutable() const { 166 if (Node->getNumOperands() < 3) 167 return false; 168 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); 169 if (!CI) 170 return false; 171 return CI->getValue()[0]; 172 } 173 }; 174 175 /// This is a simple wrapper around an MDNode which provides a 176 /// higher-level interface by hiding the details of how alias analysis 177 /// information is encoded in its operands. 178 class TBAAStructTagNode { 179 /// This node should be created with createTBAAStructTagNode. 180 const MDNode *Node; 181 182 public: 183 explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} 184 185 /// Get the MDNode for this TBAAStructTagNode. 186 const MDNode *getNode() const { return Node; } 187 188 const MDNode *getBaseType() const { 189 return dyn_cast_or_null<MDNode>(Node->getOperand(0)); 190 } 191 const MDNode *getAccessType() const { 192 return dyn_cast_or_null<MDNode>(Node->getOperand(1)); 193 } 194 uint64_t getOffset() const { 195 return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); 196 } 197 /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for 198 /// objects which are not modified (by any means) in the context where this 199 /// AliasAnalysis is relevant. 200 bool TypeIsImmutable() const { 201 if (Node->getNumOperands() < 4) 202 return false; 203 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); 204 if (!CI) 205 return false; 206 return CI->getValue()[0]; 207 } 208 }; 209 210 /// This is a simple wrapper around an MDNode which provides a 211 /// higher-level interface by hiding the details of how alias analysis 212 /// information is encoded in its operands. 213 class TBAAStructTypeNode { 214 /// This node should be created with createTBAAStructTypeNode. 215 const MDNode *Node; 216 217 public: 218 TBAAStructTypeNode() : Node(nullptr) {} 219 explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} 220 221 /// Get the MDNode for this TBAAStructTypeNode. 222 const MDNode *getNode() const { return Node; } 223 224 /// Get this TBAAStructTypeNode's field in the type DAG with 225 /// given offset. Update the offset to be relative to the field type. 226 TBAAStructTypeNode getParent(uint64_t &Offset) const { 227 // Parent can be omitted for the root node. 228 if (Node->getNumOperands() < 2) 229 return TBAAStructTypeNode(); 230 231 // Fast path for a scalar type node and a struct type node with a single 232 // field. 233 if (Node->getNumOperands() <= 3) { 234 uint64_t Cur = Node->getNumOperands() == 2 235 ? 0 236 : mdconst::extract<ConstantInt>(Node->getOperand(2)) 237 ->getZExtValue(); 238 Offset -= Cur; 239 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 240 if (!P) 241 return TBAAStructTypeNode(); 242 return TBAAStructTypeNode(P); 243 } 244 245 // Assume the offsets are in order. We return the previous field if 246 // the current offset is bigger than the given offset. 247 unsigned TheIdx = 0; 248 for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { 249 uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) 250 ->getZExtValue(); 251 if (Cur > Offset) { 252 assert(Idx >= 3 && 253 "TBAAStructTypeNode::getParent should have an offset match!"); 254 TheIdx = Idx - 2; 255 break; 256 } 257 } 258 // Move along the last field. 259 if (TheIdx == 0) 260 TheIdx = Node->getNumOperands() - 2; 261 uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) 262 ->getZExtValue(); 263 Offset -= Cur; 264 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); 265 if (!P) 266 return TBAAStructTypeNode(); 267 return TBAAStructTypeNode(P); 268 } 269 }; 270 } 271 272 /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat 273 /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA 274 /// format. 275 static bool isStructPathTBAA(const MDNode *MD) { 276 // Anonymous TBAA root starts with a MDNode and dragonegg uses it as 277 // a TBAA tag. 278 return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3; 279 } 280 281 AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, 282 const MemoryLocation &LocB) { 283 if (!EnableTBAA) 284 return AAResultBase::alias(LocA, LocB); 285 286 // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must 287 // be conservative. 288 const MDNode *AM = LocA.AATags.TBAA; 289 if (!AM) 290 return AAResultBase::alias(LocA, LocB); 291 const MDNode *BM = LocB.AATags.TBAA; 292 if (!BM) 293 return AAResultBase::alias(LocA, LocB); 294 295 // If they may alias, chain to the next AliasAnalysis. 296 if (Aliases(AM, BM)) 297 return AAResultBase::alias(LocA, LocB); 298 299 // Otherwise return a definitive result. 300 return NoAlias; 301 } 302 303 bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, 304 bool OrLocal) { 305 if (!EnableTBAA) 306 return AAResultBase::pointsToConstantMemory(Loc, OrLocal); 307 308 const MDNode *M = Loc.AATags.TBAA; 309 if (!M) 310 return AAResultBase::pointsToConstantMemory(Loc, OrLocal); 311 312 // If this is an "immutable" type, we can assume the pointer is pointing 313 // to constant memory. 314 if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || 315 (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) 316 return true; 317 318 return AAResultBase::pointsToConstantMemory(Loc, OrLocal); 319 } 320 321 FunctionModRefBehavior 322 TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) { 323 if (!EnableTBAA) 324 return AAResultBase::getModRefBehavior(CS); 325 326 FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; 327 328 // If this is an "immutable" type, we can assume the call doesn't write 329 // to memory. 330 if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 331 if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || 332 (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) 333 Min = FMRB_OnlyReadsMemory; 334 335 return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); 336 } 337 338 FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) { 339 // Functions don't have metadata. Just chain to the next implementation. 340 return AAResultBase::getModRefBehavior(F); 341 } 342 343 ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS, 344 const MemoryLocation &Loc) { 345 if (!EnableTBAA) 346 return AAResultBase::getModRefInfo(CS, Loc); 347 348 if (const MDNode *L = Loc.AATags.TBAA) 349 if (const MDNode *M = 350 CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 351 if (!Aliases(L, M)) 352 return MRI_NoModRef; 353 354 return AAResultBase::getModRefInfo(CS, Loc); 355 } 356 357 ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1, 358 ImmutableCallSite CS2) { 359 if (!EnableTBAA) 360 return AAResultBase::getModRefInfo(CS1, CS2); 361 362 if (const MDNode *M1 = 363 CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 364 if (const MDNode *M2 = 365 CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 366 if (!Aliases(M1, M2)) 367 return MRI_NoModRef; 368 369 return AAResultBase::getModRefInfo(CS1, CS2); 370 } 371 372 bool MDNode::isTBAAVtableAccess() const { 373 if (!isStructPathTBAA(this)) { 374 if (getNumOperands() < 1) 375 return false; 376 if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) { 377 if (Tag1->getString() == "vtable pointer") 378 return true; 379 } 380 return false; 381 } 382 383 // For struct-path aware TBAA, we use the access type of the tag. 384 if (getNumOperands() < 2) 385 return false; 386 MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); 387 if (!Tag) 388 return false; 389 if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { 390 if (Tag1->getString() == "vtable pointer") 391 return true; 392 } 393 return false; 394 } 395 396 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { 397 if (!A || !B) 398 return nullptr; 399 400 if (A == B) 401 return A; 402 403 // For struct-path aware TBAA, we use the access type of the tag. 404 bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B); 405 if (StructPath) { 406 A = cast_or_null<MDNode>(A->getOperand(1)); 407 if (!A) 408 return nullptr; 409 B = cast_or_null<MDNode>(B->getOperand(1)); 410 if (!B) 411 return nullptr; 412 } 413 414 SmallSetVector<MDNode *, 4> PathA; 415 MDNode *T = A; 416 while (T) { 417 if (PathA.count(T)) 418 report_fatal_error("Cycle found in TBAA metadata."); 419 PathA.insert(T); 420 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) 421 : nullptr; 422 } 423 424 SmallSetVector<MDNode *, 4> PathB; 425 T = B; 426 while (T) { 427 if (PathB.count(T)) 428 report_fatal_error("Cycle found in TBAA metadata."); 429 PathB.insert(T); 430 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) 431 : nullptr; 432 } 433 434 int IA = PathA.size() - 1; 435 int IB = PathB.size() - 1; 436 437 MDNode *Ret = nullptr; 438 while (IA >= 0 && IB >= 0) { 439 if (PathA[IA] == PathB[IB]) 440 Ret = PathA[IA]; 441 else 442 break; 443 --IA; 444 --IB; 445 } 446 if (!StructPath) 447 return Ret; 448 449 if (!Ret) 450 return nullptr; 451 // We need to convert from a type node to a tag node. 452 Type *Int64 = IntegerType::get(A->getContext(), 64); 453 Metadata *Ops[3] = {Ret, Ret, 454 ConstantAsMetadata::get(ConstantInt::get(Int64, 0))}; 455 return MDNode::get(A->getContext(), Ops); 456 } 457 458 void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { 459 if (Merge) 460 N.TBAA = 461 MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa)); 462 else 463 N.TBAA = getMetadata(LLVMContext::MD_tbaa); 464 465 if (Merge) 466 N.Scope = MDNode::getMostGenericAliasScope( 467 N.Scope, getMetadata(LLVMContext::MD_alias_scope)); 468 else 469 N.Scope = getMetadata(LLVMContext::MD_alias_scope); 470 471 if (Merge) 472 N.NoAlias = 473 MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias)); 474 else 475 N.NoAlias = getMetadata(LLVMContext::MD_noalias); 476 } 477 478 /// Aliases - Test whether the type represented by A may alias the 479 /// type represented by B. 480 bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { 481 // Make sure that both MDNodes are struct-path aware. 482 if (isStructPathTBAA(A) && isStructPathTBAA(B)) 483 return PathAliases(A, B); 484 485 // Keep track of the root node for A and B. 486 TBAANode RootA, RootB; 487 488 // Climb the tree from A to see if we reach B. 489 for (TBAANode T(A);;) { 490 if (T.getNode() == B) 491 // B is an ancestor of A. 492 return true; 493 494 RootA = T; 495 T = T.getParent(); 496 if (!T.getNode()) 497 break; 498 } 499 500 // Climb the tree from B to see if we reach A. 501 for (TBAANode T(B);;) { 502 if (T.getNode() == A) 503 // A is an ancestor of B. 504 return true; 505 506 RootB = T; 507 T = T.getParent(); 508 if (!T.getNode()) 509 break; 510 } 511 512 // Neither node is an ancestor of the other. 513 514 // If they have different roots, they're part of different potentially 515 // unrelated type systems, so we must be conservative. 516 if (RootA.getNode() != RootB.getNode()) 517 return true; 518 519 // If they have the same root, then we've proved there's no alias. 520 return false; 521 } 522 523 /// Test whether the struct-path tag represented by A may alias the 524 /// struct-path tag represented by B. 525 bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const { 526 // Verify that both input nodes are struct-path aware. 527 assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); 528 assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); 529 530 // Keep track of the root node for A and B. 531 TBAAStructTypeNode RootA, RootB; 532 TBAAStructTagNode TagA(A), TagB(B); 533 534 // TODO: We need to check if AccessType of TagA encloses AccessType of 535 // TagB to support aggregate AccessType. If yes, return true. 536 537 // Start from the base type of A, follow the edge with the correct offset in 538 // the type DAG and adjust the offset until we reach the base type of B or 539 // until we reach the Root node. 540 // Compare the adjusted offset once we have the same base. 541 542 // Climb the type DAG from base type of A to see if we reach base type of B. 543 const MDNode *BaseA = TagA.getBaseType(); 544 const MDNode *BaseB = TagB.getBaseType(); 545 uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); 546 for (TBAAStructTypeNode T(BaseA);;) { 547 if (T.getNode() == BaseB) 548 // Base type of A encloses base type of B, check if the offsets match. 549 return OffsetA == OffsetB; 550 551 RootA = T; 552 // Follow the edge with the correct offset, OffsetA will be adjusted to 553 // be relative to the field type. 554 T = T.getParent(OffsetA); 555 if (!T.getNode()) 556 break; 557 } 558 559 // Reset OffsetA and climb the type DAG from base type of B to see if we reach 560 // base type of A. 561 OffsetA = TagA.getOffset(); 562 for (TBAAStructTypeNode T(BaseB);;) { 563 if (T.getNode() == BaseA) 564 // Base type of B encloses base type of A, check if the offsets match. 565 return OffsetA == OffsetB; 566 567 RootB = T; 568 // Follow the edge with the correct offset, OffsetB will be adjusted to 569 // be relative to the field type. 570 T = T.getParent(OffsetB); 571 if (!T.getNode()) 572 break; 573 } 574 575 // Neither node is an ancestor of the other. 576 577 // If they have different roots, they're part of different potentially 578 // unrelated type systems, so we must be conservative. 579 if (RootA.getNode() != RootB.getNode()) 580 return true; 581 582 // If they have the same root, then we've proved there's no alias. 583 return false; 584 } 585 586 char TypeBasedAA::PassID; 587 588 TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> &AM) { 589 return TypeBasedAAResult(); 590 } 591 592 char TypeBasedAAWrapperPass::ID = 0; 593 INITIALIZE_PASS(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis", 594 false, true) 595 596 ImmutablePass *llvm::createTypeBasedAAWrapperPass() { 597 return new TypeBasedAAWrapperPass(); 598 } 599 600 TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) { 601 initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry()); 602 } 603 604 bool TypeBasedAAWrapperPass::doInitialization(Module &M) { 605 Result.reset(new TypeBasedAAResult()); 606 return false; 607 } 608 609 bool TypeBasedAAWrapperPass::doFinalization(Module &M) { 610 Result.reset(); 611 return false; 612 } 613 614 void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 615 AU.setPreservesAll(); 616 } 617