1 //===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the ValueEnumerator class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ValueEnumerator.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/Config/llvm-config.h" 18 #include "llvm/IR/Argument.h" 19 #include "llvm/IR/Attributes.h" 20 #include "llvm/IR/BasicBlock.h" 21 #include "llvm/IR/Constant.h" 22 #include "llvm/IR/DebugInfoMetadata.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/GlobalAlias.h" 26 #include "llvm/IR/GlobalIFunc.h" 27 #include "llvm/IR/GlobalObject.h" 28 #include "llvm/IR/GlobalValue.h" 29 #include "llvm/IR/GlobalVariable.h" 30 #include "llvm/IR/Instruction.h" 31 #include "llvm/IR/Instructions.h" 32 #include "llvm/IR/Metadata.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Use.h" 36 #include "llvm/IR/UseListOrder.h" 37 #include "llvm/IR/User.h" 38 #include "llvm/IR/Value.h" 39 #include "llvm/IR/ValueSymbolTable.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <algorithm> 46 #include <cassert> 47 #include <cstddef> 48 #include <iterator> 49 #include <tuple> 50 #include <utility> 51 #include <vector> 52 53 using namespace llvm; 54 55 namespace { 56 57 struct OrderMap { 58 DenseMap<const Value *, std::pair<unsigned, bool>> IDs; 59 unsigned LastGlobalConstantID = 0; 60 unsigned LastGlobalValueID = 0; 61 62 OrderMap() = default; 63 64 bool isGlobalConstant(unsigned ID) const { 65 return ID <= LastGlobalConstantID; 66 } 67 68 bool isGlobalValue(unsigned ID) const { 69 return ID <= LastGlobalValueID && !isGlobalConstant(ID); 70 } 71 72 unsigned size() const { return IDs.size(); } 73 std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; } 74 75 std::pair<unsigned, bool> lookup(const Value *V) const { 76 return IDs.lookup(V); 77 } 78 79 void index(const Value *V) { 80 // Explicitly sequence get-size and insert-value operations to avoid UB. 81 unsigned ID = IDs.size() + 1; 82 IDs[V].first = ID; 83 } 84 }; 85 86 } // end anonymous namespace 87 88 static void orderValue(const Value *V, OrderMap &OM) { 89 if (OM.lookup(V).first) 90 return; 91 92 if (const Constant *C = dyn_cast<Constant>(V)) 93 if (C->getNumOperands() && !isa<GlobalValue>(C)) 94 for (const Value *Op : C->operands()) 95 if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op)) 96 orderValue(Op, OM); 97 98 // Note: we cannot cache this lookup above, since inserting into the map 99 // changes the map's size, and thus affects the other IDs. 100 OM.index(V); 101 } 102 103 static OrderMap orderModule(const Module &M) { 104 // This needs to match the order used by ValueEnumerator::ValueEnumerator() 105 // and ValueEnumerator::incorporateFunction(). 106 OrderMap OM; 107 108 // In the reader, initializers of GlobalValues are set *after* all the 109 // globals have been read. Rather than awkwardly modeling this behaviour 110 // directly in predictValueUseListOrderImpl(), just assign IDs to 111 // initializers of GlobalValues before GlobalValues themselves to model this 112 // implicitly. 113 for (const GlobalVariable &G : M.globals()) 114 if (G.hasInitializer()) 115 if (!isa<GlobalValue>(G.getInitializer())) 116 orderValue(G.getInitializer(), OM); 117 for (const GlobalAlias &A : M.aliases()) 118 if (!isa<GlobalValue>(A.getAliasee())) 119 orderValue(A.getAliasee(), OM); 120 for (const GlobalIFunc &I : M.ifuncs()) 121 if (!isa<GlobalValue>(I.getResolver())) 122 orderValue(I.getResolver(), OM); 123 for (const Function &F : M) { 124 for (const Use &U : F.operands()) 125 if (!isa<GlobalValue>(U.get())) 126 orderValue(U.get(), OM); 127 } 128 OM.LastGlobalConstantID = OM.size(); 129 130 // Initializers of GlobalValues are processed in 131 // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather 132 // than ValueEnumerator, and match the code in predictValueUseListOrderImpl() 133 // by giving IDs in reverse order. 134 // 135 // Since GlobalValues never reference each other directly (just through 136 // initializers), their relative IDs only matter for determining order of 137 // uses in their initializers. 138 for (const Function &F : M) 139 orderValue(&F, OM); 140 for (const GlobalAlias &A : M.aliases()) 141 orderValue(&A, OM); 142 for (const GlobalIFunc &I : M.ifuncs()) 143 orderValue(&I, OM); 144 for (const GlobalVariable &G : M.globals()) 145 orderValue(&G, OM); 146 OM.LastGlobalValueID = OM.size(); 147 148 for (const Function &F : M) { 149 if (F.isDeclaration()) 150 continue; 151 // Here we need to match the union of ValueEnumerator::incorporateFunction() 152 // and WriteFunction(). Basic blocks are implicitly declared before 153 // anything else (by declaring their size). 154 for (const BasicBlock &BB : F) 155 orderValue(&BB, OM); 156 for (const Argument &A : F.args()) 157 orderValue(&A, OM); 158 for (const BasicBlock &BB : F) 159 for (const Instruction &I : BB) 160 for (const Value *Op : I.operands()) 161 if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) || 162 isa<InlineAsm>(*Op)) 163 orderValue(Op, OM); 164 for (const BasicBlock &BB : F) 165 for (const Instruction &I : BB) 166 orderValue(&I, OM); 167 } 168 return OM; 169 } 170 171 static void predictValueUseListOrderImpl(const Value *V, const Function *F, 172 unsigned ID, const OrderMap &OM, 173 UseListOrderStack &Stack) { 174 // Predict use-list order for this one. 175 using Entry = std::pair<const Use *, unsigned>; 176 SmallVector<Entry, 64> List; 177 for (const Use &U : V->uses()) 178 // Check if this user will be serialized. 179 if (OM.lookup(U.getUser()).first) 180 List.push_back(std::make_pair(&U, List.size())); 181 182 if (List.size() < 2) 183 // We may have lost some users. 184 return; 185 186 bool IsGlobalValue = OM.isGlobalValue(ID); 187 llvm::sort(List.begin(), List.end(), [&](const Entry &L, const Entry &R) { 188 const Use *LU = L.first; 189 const Use *RU = R.first; 190 if (LU == RU) 191 return false; 192 193 auto LID = OM.lookup(LU->getUser()).first; 194 auto RID = OM.lookup(RU->getUser()).first; 195 196 // Global values are processed in reverse order. 197 // 198 // Moreover, initializers of GlobalValues are set *after* all the globals 199 // have been read (despite having earlier IDs). Rather than awkwardly 200 // modeling this behaviour here, orderModule() has assigned IDs to 201 // initializers of GlobalValues before GlobalValues themselves. 202 if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID)) 203 return LID < RID; 204 205 // If ID is 4, then expect: 7 6 5 1 2 3. 206 if (LID < RID) { 207 if (RID <= ID) 208 if (!IsGlobalValue) // GlobalValue uses don't get reversed. 209 return true; 210 return false; 211 } 212 if (RID < LID) { 213 if (LID <= ID) 214 if (!IsGlobalValue) // GlobalValue uses don't get reversed. 215 return false; 216 return true; 217 } 218 219 // LID and RID are equal, so we have different operands of the same user. 220 // Assume operands are added in order for all instructions. 221 if (LID <= ID) 222 if (!IsGlobalValue) // GlobalValue uses don't get reversed. 223 return LU->getOperandNo() < RU->getOperandNo(); 224 return LU->getOperandNo() > RU->getOperandNo(); 225 }); 226 227 if (std::is_sorted( 228 List.begin(), List.end(), 229 [](const Entry &L, const Entry &R) { return L.second < R.second; })) 230 // Order is already correct. 231 return; 232 233 // Store the shuffle. 234 Stack.emplace_back(V, F, List.size()); 235 assert(List.size() == Stack.back().Shuffle.size() && "Wrong size"); 236 for (size_t I = 0, E = List.size(); I != E; ++I) 237 Stack.back().Shuffle[I] = List[I].second; 238 } 239 240 static void predictValueUseListOrder(const Value *V, const Function *F, 241 OrderMap &OM, UseListOrderStack &Stack) { 242 auto &IDPair = OM[V]; 243 assert(IDPair.first && "Unmapped value"); 244 if (IDPair.second) 245 // Already predicted. 246 return; 247 248 // Do the actual prediction. 249 IDPair.second = true; 250 if (!V->use_empty() && std::next(V->use_begin()) != V->use_end()) 251 predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack); 252 253 // Recursive descent into constants. 254 if (const Constant *C = dyn_cast<Constant>(V)) 255 if (C->getNumOperands()) // Visit GlobalValues. 256 for (const Value *Op : C->operands()) 257 if (isa<Constant>(Op)) // Visit GlobalValues. 258 predictValueUseListOrder(Op, F, OM, Stack); 259 } 260 261 static UseListOrderStack predictUseListOrder(const Module &M) { 262 OrderMap OM = orderModule(M); 263 264 // Use-list orders need to be serialized after all the users have been added 265 // to a value, or else the shuffles will be incomplete. Store them per 266 // function in a stack. 267 // 268 // Aside from function order, the order of values doesn't matter much here. 269 UseListOrderStack Stack; 270 271 // We want to visit the functions backward now so we can list function-local 272 // constants in the last Function they're used in. Module-level constants 273 // have already been visited above. 274 for (auto I = M.rbegin(), E = M.rend(); I != E; ++I) { 275 const Function &F = *I; 276 if (F.isDeclaration()) 277 continue; 278 for (const BasicBlock &BB : F) 279 predictValueUseListOrder(&BB, &F, OM, Stack); 280 for (const Argument &A : F.args()) 281 predictValueUseListOrder(&A, &F, OM, Stack); 282 for (const BasicBlock &BB : F) 283 for (const Instruction &I : BB) 284 for (const Value *Op : I.operands()) 285 if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues. 286 predictValueUseListOrder(Op, &F, OM, Stack); 287 for (const BasicBlock &BB : F) 288 for (const Instruction &I : BB) 289 predictValueUseListOrder(&I, &F, OM, Stack); 290 } 291 292 // Visit globals last, since the module-level use-list block will be seen 293 // before the function bodies are processed. 294 for (const GlobalVariable &G : M.globals()) 295 predictValueUseListOrder(&G, nullptr, OM, Stack); 296 for (const Function &F : M) 297 predictValueUseListOrder(&F, nullptr, OM, Stack); 298 for (const GlobalAlias &A : M.aliases()) 299 predictValueUseListOrder(&A, nullptr, OM, Stack); 300 for (const GlobalIFunc &I : M.ifuncs()) 301 predictValueUseListOrder(&I, nullptr, OM, Stack); 302 for (const GlobalVariable &G : M.globals()) 303 if (G.hasInitializer()) 304 predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack); 305 for (const GlobalAlias &A : M.aliases()) 306 predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack); 307 for (const GlobalIFunc &I : M.ifuncs()) 308 predictValueUseListOrder(I.getResolver(), nullptr, OM, Stack); 309 for (const Function &F : M) { 310 for (const Use &U : F.operands()) 311 predictValueUseListOrder(U.get(), nullptr, OM, Stack); 312 } 313 314 return Stack; 315 } 316 317 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) { 318 return V.first->getType()->isIntOrIntVectorTy(); 319 } 320 321 ValueEnumerator::ValueEnumerator(const Module &M, 322 bool ShouldPreserveUseListOrder) 323 : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { 324 if (ShouldPreserveUseListOrder) 325 UseListOrders = predictUseListOrder(M); 326 327 // Enumerate the global variables. 328 for (const GlobalVariable &GV : M.globals()) 329 EnumerateValue(&GV); 330 331 // Enumerate the functions. 332 for (const Function & F : M) { 333 EnumerateValue(&F); 334 EnumerateAttributes(F.getAttributes()); 335 } 336 337 // Enumerate the aliases. 338 for (const GlobalAlias &GA : M.aliases()) 339 EnumerateValue(&GA); 340 341 // Enumerate the ifuncs. 342 for (const GlobalIFunc &GIF : M.ifuncs()) 343 EnumerateValue(&GIF); 344 345 // Remember what is the cutoff between globalvalue's and other constants. 346 unsigned FirstConstant = Values.size(); 347 348 // Enumerate the global variable initializers and attributes. 349 for (const GlobalVariable &GV : M.globals()) { 350 if (GV.hasInitializer()) 351 EnumerateValue(GV.getInitializer()); 352 if (GV.hasAttributes()) 353 EnumerateAttributes(GV.getAttributesAsList(AttributeList::FunctionIndex)); 354 } 355 356 // Enumerate the aliasees. 357 for (const GlobalAlias &GA : M.aliases()) 358 EnumerateValue(GA.getAliasee()); 359 360 // Enumerate the ifunc resolvers. 361 for (const GlobalIFunc &GIF : M.ifuncs()) 362 EnumerateValue(GIF.getResolver()); 363 364 // Enumerate any optional Function data. 365 for (const Function &F : M) 366 for (const Use &U : F.operands()) 367 EnumerateValue(U.get()); 368 369 // Enumerate the metadata type. 370 // 371 // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode 372 // only encodes the metadata type when it's used as a value. 373 EnumerateType(Type::getMetadataTy(M.getContext())); 374 375 // Insert constants and metadata that are named at module level into the slot 376 // pool so that the module symbol table can refer to them... 377 EnumerateValueSymbolTable(M.getValueSymbolTable()); 378 EnumerateNamedMetadata(M); 379 380 SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; 381 for (const GlobalVariable &GV : M.globals()) { 382 MDs.clear(); 383 GV.getAllMetadata(MDs); 384 for (const auto &I : MDs) 385 // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer 386 // to write metadata to the global variable's own metadata block 387 // (PR28134). 388 EnumerateMetadata(nullptr, I.second); 389 } 390 391 // Enumerate types used by function bodies and argument lists. 392 for (const Function &F : M) { 393 for (const Argument &A : F.args()) 394 EnumerateType(A.getType()); 395 396 // Enumerate metadata attached to this function. 397 MDs.clear(); 398 F.getAllMetadata(MDs); 399 for (const auto &I : MDs) 400 EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second); 401 402 for (const BasicBlock &BB : F) 403 for (const Instruction &I : BB) { 404 for (const Use &Op : I.operands()) { 405 auto *MD = dyn_cast<MetadataAsValue>(&Op); 406 if (!MD) { 407 EnumerateOperandType(Op); 408 continue; 409 } 410 411 // Local metadata is enumerated during function-incorporation. 412 if (isa<LocalAsMetadata>(MD->getMetadata())) 413 continue; 414 415 EnumerateMetadata(&F, MD->getMetadata()); 416 } 417 EnumerateType(I.getType()); 418 if (const CallInst *CI = dyn_cast<CallInst>(&I)) 419 EnumerateAttributes(CI->getAttributes()); 420 else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) 421 EnumerateAttributes(II->getAttributes()); 422 423 // Enumerate metadata attached with this instruction. 424 MDs.clear(); 425 I.getAllMetadataOtherThanDebugLoc(MDs); 426 for (unsigned i = 0, e = MDs.size(); i != e; ++i) 427 EnumerateMetadata(&F, MDs[i].second); 428 429 // Don't enumerate the location directly -- it has a special record 430 // type -- but enumerate its operands. 431 if (DILocation *L = I.getDebugLoc()) 432 for (const Metadata *Op : L->operands()) 433 EnumerateMetadata(&F, Op); 434 } 435 } 436 437 // Optimize constant ordering. 438 OptimizeConstants(FirstConstant, Values.size()); 439 440 // Organize metadata ordering. 441 organizeMetadata(); 442 } 443 444 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { 445 InstructionMapType::const_iterator I = InstructionMap.find(Inst); 446 assert(I != InstructionMap.end() && "Instruction is not mapped!"); 447 return I->second; 448 } 449 450 unsigned ValueEnumerator::getComdatID(const Comdat *C) const { 451 unsigned ComdatID = Comdats.idFor(C); 452 assert(ComdatID && "Comdat not found!"); 453 return ComdatID; 454 } 455 456 void ValueEnumerator::setInstructionID(const Instruction *I) { 457 InstructionMap[I] = InstructionCount++; 458 } 459 460 unsigned ValueEnumerator::getValueID(const Value *V) const { 461 if (auto *MD = dyn_cast<MetadataAsValue>(V)) 462 return getMetadataID(MD->getMetadata()); 463 464 ValueMapType::const_iterator I = ValueMap.find(V); 465 assert(I != ValueMap.end() && "Value not in slotcalculator!"); 466 return I->second-1; 467 } 468 469 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 470 LLVM_DUMP_METHOD void ValueEnumerator::dump() const { 471 print(dbgs(), ValueMap, "Default"); 472 dbgs() << '\n'; 473 print(dbgs(), MetadataMap, "MetaData"); 474 dbgs() << '\n'; 475 } 476 #endif 477 478 void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map, 479 const char *Name) const { 480 OS << "Map Name: " << Name << "\n"; 481 OS << "Size: " << Map.size() << "\n"; 482 for (ValueMapType::const_iterator I = Map.begin(), 483 E = Map.end(); I != E; ++I) { 484 const Value *V = I->first; 485 if (V->hasName()) 486 OS << "Value: " << V->getName(); 487 else 488 OS << "Value: [null]\n"; 489 V->print(errs()); 490 errs() << '\n'; 491 492 OS << " Uses(" << V->getNumUses() << "):"; 493 for (const Use &U : V->uses()) { 494 if (&U != &*V->use_begin()) 495 OS << ","; 496 if(U->hasName()) 497 OS << " " << U->getName(); 498 else 499 OS << " [null]"; 500 501 } 502 OS << "\n\n"; 503 } 504 } 505 506 void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map, 507 const char *Name) const { 508 OS << "Map Name: " << Name << "\n"; 509 OS << "Size: " << Map.size() << "\n"; 510 for (auto I = Map.begin(), E = Map.end(); I != E; ++I) { 511 const Metadata *MD = I->first; 512 OS << "Metadata: slot = " << I->second.ID << "\n"; 513 OS << "Metadata: function = " << I->second.F << "\n"; 514 MD->print(OS); 515 OS << "\n"; 516 } 517 } 518 519 /// OptimizeConstants - Reorder constant pool for denser encoding. 520 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { 521 if (CstStart == CstEnd || CstStart+1 == CstEnd) return; 522 523 if (ShouldPreserveUseListOrder) 524 // Optimizing constants makes the use-list order difficult to predict. 525 // Disable it for now when trying to preserve the order. 526 return; 527 528 std::stable_sort(Values.begin() + CstStart, Values.begin() + CstEnd, 529 [this](const std::pair<const Value *, unsigned> &LHS, 530 const std::pair<const Value *, unsigned> &RHS) { 531 // Sort by plane. 532 if (LHS.first->getType() != RHS.first->getType()) 533 return getTypeID(LHS.first->getType()) < getTypeID(RHS.first->getType()); 534 // Then by frequency. 535 return LHS.second > RHS.second; 536 }); 537 538 // Ensure that integer and vector of integer constants are at the start of the 539 // constant pool. This is important so that GEP structure indices come before 540 // gep constant exprs. 541 std::stable_partition(Values.begin() + CstStart, Values.begin() + CstEnd, 542 isIntOrIntVectorValue); 543 544 // Rebuild the modified portion of ValueMap. 545 for (; CstStart != CstEnd; ++CstStart) 546 ValueMap[Values[CstStart].first] = CstStart+1; 547 } 548 549 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol 550 /// table into the values table. 551 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { 552 for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); 553 VI != VE; ++VI) 554 EnumerateValue(VI->getValue()); 555 } 556 557 /// Insert all of the values referenced by named metadata in the specified 558 /// module. 559 void ValueEnumerator::EnumerateNamedMetadata(const Module &M) { 560 for (const auto &I : M.named_metadata()) 561 EnumerateNamedMDNode(&I); 562 } 563 564 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { 565 for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) 566 EnumerateMetadata(nullptr, MD->getOperand(i)); 567 } 568 569 unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const { 570 return F ? getValueID(F) + 1 : 0; 571 } 572 573 void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) { 574 EnumerateMetadata(getMetadataFunctionID(F), MD); 575 } 576 577 void ValueEnumerator::EnumerateFunctionLocalMetadata( 578 const Function &F, const LocalAsMetadata *Local) { 579 EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local); 580 } 581 582 void ValueEnumerator::dropFunctionFromMetadata( 583 MetadataMapType::value_type &FirstMD) { 584 SmallVector<const MDNode *, 64> Worklist; 585 auto push = [&Worklist](MetadataMapType::value_type &MD) { 586 auto &Entry = MD.second; 587 588 // Nothing to do if this metadata isn't tagged. 589 if (!Entry.F) 590 return; 591 592 // Drop the function tag. 593 Entry.F = 0; 594 595 // If this is has an ID and is an MDNode, then its operands have entries as 596 // well. We need to drop the function from them too. 597 if (Entry.ID) 598 if (auto *N = dyn_cast<MDNode>(MD.first)) 599 Worklist.push_back(N); 600 }; 601 push(FirstMD); 602 while (!Worklist.empty()) 603 for (const Metadata *Op : Worklist.pop_back_val()->operands()) { 604 if (!Op) 605 continue; 606 auto MD = MetadataMap.find(Op); 607 if (MD != MetadataMap.end()) 608 push(*MD); 609 } 610 } 611 612 void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) { 613 // It's vital for reader efficiency that uniqued subgraphs are done in 614 // post-order; it's expensive when their operands have forward references. 615 // If a distinct node is referenced from a uniqued node, it'll be delayed 616 // until the uniqued subgraph has been completely traversed. 617 SmallVector<const MDNode *, 32> DelayedDistinctNodes; 618 619 // Start by enumerating MD, and then work through its transitive operands in 620 // post-order. This requires a depth-first search. 621 SmallVector<std::pair<const MDNode *, MDNode::op_iterator>, 32> Worklist; 622 if (const MDNode *N = enumerateMetadataImpl(F, MD)) 623 Worklist.push_back(std::make_pair(N, N->op_begin())); 624 625 while (!Worklist.empty()) { 626 const MDNode *N = Worklist.back().first; 627 628 // Enumerate operands until we hit a new node. We need to traverse these 629 // nodes' operands before visiting the rest of N's operands. 630 MDNode::op_iterator I = std::find_if( 631 Worklist.back().second, N->op_end(), 632 [&](const Metadata *MD) { return enumerateMetadataImpl(F, MD); }); 633 if (I != N->op_end()) { 634 auto *Op = cast<MDNode>(*I); 635 Worklist.back().second = ++I; 636 637 // Delay traversing Op if it's a distinct node and N is uniqued. 638 if (Op->isDistinct() && !N->isDistinct()) 639 DelayedDistinctNodes.push_back(Op); 640 else 641 Worklist.push_back(std::make_pair(Op, Op->op_begin())); 642 continue; 643 } 644 645 // All the operands have been visited. Now assign an ID. 646 Worklist.pop_back(); 647 MDs.push_back(N); 648 MetadataMap[N].ID = MDs.size(); 649 650 // Flush out any delayed distinct nodes; these are all the distinct nodes 651 // that are leaves in last uniqued subgraph. 652 if (Worklist.empty() || Worklist.back().first->isDistinct()) { 653 for (const MDNode *N : DelayedDistinctNodes) 654 Worklist.push_back(std::make_pair(N, N->op_begin())); 655 DelayedDistinctNodes.clear(); 656 } 657 } 658 } 659 660 const MDNode *ValueEnumerator::enumerateMetadataImpl(unsigned F, const Metadata *MD) { 661 if (!MD) 662 return nullptr; 663 664 assert( 665 (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) && 666 "Invalid metadata kind"); 667 668 auto Insertion = MetadataMap.insert(std::make_pair(MD, MDIndex(F))); 669 MDIndex &Entry = Insertion.first->second; 670 if (!Insertion.second) { 671 // Already mapped. If F doesn't match the function tag, drop it. 672 if (Entry.hasDifferentFunction(F)) 673 dropFunctionFromMetadata(*Insertion.first); 674 return nullptr; 675 } 676 677 // Don't assign IDs to metadata nodes. 678 if (auto *N = dyn_cast<MDNode>(MD)) 679 return N; 680 681 // Save the metadata. 682 MDs.push_back(MD); 683 Entry.ID = MDs.size(); 684 685 // Enumerate the constant, if any. 686 if (auto *C = dyn_cast<ConstantAsMetadata>(MD)) 687 EnumerateValue(C->getValue()); 688 689 return nullptr; 690 } 691 692 /// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata 693 /// information reachable from the metadata. 694 void ValueEnumerator::EnumerateFunctionLocalMetadata( 695 unsigned F, const LocalAsMetadata *Local) { 696 assert(F && "Expected a function"); 697 698 // Check to see if it's already in! 699 MDIndex &Index = MetadataMap[Local]; 700 if (Index.ID) { 701 assert(Index.F == F && "Expected the same function"); 702 return; 703 } 704 705 MDs.push_back(Local); 706 Index.F = F; 707 Index.ID = MDs.size(); 708 709 EnumerateValue(Local->getValue()); 710 } 711 712 static unsigned getMetadataTypeOrder(const Metadata *MD) { 713 // Strings are emitted in bulk and must come first. 714 if (isa<MDString>(MD)) 715 return 0; 716 717 // ConstantAsMetadata doesn't reference anything. We may as well shuffle it 718 // to the front since we can detect it. 719 auto *N = dyn_cast<MDNode>(MD); 720 if (!N) 721 return 1; 722 723 // The reader is fast forward references for distinct node operands, but slow 724 // when uniqued operands are unresolved. 725 return N->isDistinct() ? 2 : 3; 726 } 727 728 void ValueEnumerator::organizeMetadata() { 729 assert(MetadataMap.size() == MDs.size() && 730 "Metadata map and vector out of sync"); 731 732 if (MDs.empty()) 733 return; 734 735 // Copy out the index information from MetadataMap in order to choose a new 736 // order. 737 SmallVector<MDIndex, 64> Order; 738 Order.reserve(MetadataMap.size()); 739 for (const Metadata *MD : MDs) 740 Order.push_back(MetadataMap.lookup(MD)); 741 742 // Partition: 743 // - by function, then 744 // - by isa<MDString> 745 // and then sort by the original/current ID. Since the IDs are guaranteed to 746 // be unique, the result of std::sort will be deterministic. There's no need 747 // for std::stable_sort. 748 llvm::sort(Order.begin(), Order.end(), [this](MDIndex LHS, MDIndex RHS) { 749 return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) < 750 std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID); 751 }); 752 753 // Rebuild MDs, index the metadata ranges for each function in FunctionMDs, 754 // and fix up MetadataMap. 755 std::vector<const Metadata *> OldMDs = std::move(MDs); 756 MDs.reserve(OldMDs.size()); 757 for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) { 758 auto *MD = Order[I].get(OldMDs); 759 MDs.push_back(MD); 760 MetadataMap[MD].ID = I + 1; 761 if (isa<MDString>(MD)) 762 ++NumMDStrings; 763 } 764 765 // Return early if there's nothing for the functions. 766 if (MDs.size() == Order.size()) 767 return; 768 769 // Build the function metadata ranges. 770 MDRange R; 771 FunctionMDs.reserve(OldMDs.size()); 772 unsigned PrevF = 0; 773 for (unsigned I = MDs.size(), E = Order.size(), ID = MDs.size(); I != E; 774 ++I) { 775 unsigned F = Order[I].F; 776 if (!PrevF) { 777 PrevF = F; 778 } else if (PrevF != F) { 779 R.Last = FunctionMDs.size(); 780 std::swap(R, FunctionMDInfo[PrevF]); 781 R.First = FunctionMDs.size(); 782 783 ID = MDs.size(); 784 PrevF = F; 785 } 786 787 auto *MD = Order[I].get(OldMDs); 788 FunctionMDs.push_back(MD); 789 MetadataMap[MD].ID = ++ID; 790 if (isa<MDString>(MD)) 791 ++R.NumStrings; 792 } 793 R.Last = FunctionMDs.size(); 794 FunctionMDInfo[PrevF] = R; 795 } 796 797 void ValueEnumerator::incorporateFunctionMetadata(const Function &F) { 798 NumModuleMDs = MDs.size(); 799 800 auto R = FunctionMDInfo.lookup(getValueID(&F) + 1); 801 NumMDStrings = R.NumStrings; 802 MDs.insert(MDs.end(), FunctionMDs.begin() + R.First, 803 FunctionMDs.begin() + R.Last); 804 } 805 806 void ValueEnumerator::EnumerateValue(const Value *V) { 807 assert(!V->getType()->isVoidTy() && "Can't insert void values!"); 808 assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!"); 809 810 // Check to see if it's already in! 811 unsigned &ValueID = ValueMap[V]; 812 if (ValueID) { 813 // Increment use count. 814 Values[ValueID-1].second++; 815 return; 816 } 817 818 if (auto *GO = dyn_cast<GlobalObject>(V)) 819 if (const Comdat *C = GO->getComdat()) 820 Comdats.insert(C); 821 822 // Enumerate the type of this value. 823 EnumerateType(V->getType()); 824 825 if (const Constant *C = dyn_cast<Constant>(V)) { 826 if (isa<GlobalValue>(C)) { 827 // Initializers for globals are handled explicitly elsewhere. 828 } else if (C->getNumOperands()) { 829 // If a constant has operands, enumerate them. This makes sure that if a 830 // constant has uses (for example an array of const ints), that they are 831 // inserted also. 832 833 // We prefer to enumerate them with values before we enumerate the user 834 // itself. This makes it more likely that we can avoid forward references 835 // in the reader. We know that there can be no cycles in the constants 836 // graph that don't go through a global variable. 837 for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); 838 I != E; ++I) 839 if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress. 840 EnumerateValue(*I); 841 842 // Finally, add the value. Doing this could make the ValueID reference be 843 // dangling, don't reuse it. 844 Values.push_back(std::make_pair(V, 1U)); 845 ValueMap[V] = Values.size(); 846 return; 847 } 848 } 849 850 // Add the value. 851 Values.push_back(std::make_pair(V, 1U)); 852 ValueID = Values.size(); 853 } 854 855 856 void ValueEnumerator::EnumerateType(Type *Ty) { 857 unsigned *TypeID = &TypeMap[Ty]; 858 859 // We've already seen this type. 860 if (*TypeID) 861 return; 862 863 // If it is a non-anonymous struct, mark the type as being visited so that we 864 // don't recursively visit it. This is safe because we allow forward 865 // references of these in the bitcode reader. 866 if (StructType *STy = dyn_cast<StructType>(Ty)) 867 if (!STy->isLiteral()) 868 *TypeID = ~0U; 869 870 // Enumerate all of the subtypes before we enumerate this type. This ensures 871 // that the type will be enumerated in an order that can be directly built. 872 for (Type *SubTy : Ty->subtypes()) 873 EnumerateType(SubTy); 874 875 // Refresh the TypeID pointer in case the table rehashed. 876 TypeID = &TypeMap[Ty]; 877 878 // Check to see if we got the pointer another way. This can happen when 879 // enumerating recursive types that hit the base case deeper than they start. 880 // 881 // If this is actually a struct that we are treating as forward ref'able, 882 // then emit the definition now that all of its contents are available. 883 if (*TypeID && *TypeID != ~0U) 884 return; 885 886 // Add this type now that its contents are all happily enumerated. 887 Types.push_back(Ty); 888 889 *TypeID = Types.size(); 890 } 891 892 // Enumerate the types for the specified value. If the value is a constant, 893 // walk through it, enumerating the types of the constant. 894 void ValueEnumerator::EnumerateOperandType(const Value *V) { 895 EnumerateType(V->getType()); 896 897 assert(!isa<MetadataAsValue>(V) && "Unexpected metadata operand"); 898 899 const Constant *C = dyn_cast<Constant>(V); 900 if (!C) 901 return; 902 903 // If this constant is already enumerated, ignore it, we know its type must 904 // be enumerated. 905 if (ValueMap.count(C)) 906 return; 907 908 // This constant may have operands, make sure to enumerate the types in 909 // them. 910 for (const Value *Op : C->operands()) { 911 // Don't enumerate basic blocks here, this happens as operands to 912 // blockaddress. 913 if (isa<BasicBlock>(Op)) 914 continue; 915 916 EnumerateOperandType(Op); 917 } 918 } 919 920 void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { 921 if (PAL.isEmpty()) return; // null is always 0. 922 923 // Do a lookup. 924 unsigned &Entry = AttributeListMap[PAL]; 925 if (Entry == 0) { 926 // Never saw this before, add it. 927 AttributeLists.push_back(PAL); 928 Entry = AttributeLists.size(); 929 } 930 931 // Do lookups for all attribute groups. 932 for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) { 933 AttributeSet AS = PAL.getAttributes(i); 934 if (!AS.hasAttributes()) 935 continue; 936 IndexAndAttrSet Pair = {i, AS}; 937 unsigned &Entry = AttributeGroupMap[Pair]; 938 if (Entry == 0) { 939 AttributeGroups.push_back(Pair); 940 Entry = AttributeGroups.size(); 941 } 942 } 943 } 944 945 void ValueEnumerator::incorporateFunction(const Function &F) { 946 InstructionCount = 0; 947 NumModuleValues = Values.size(); 948 949 // Add global metadata to the function block. This doesn't include 950 // LocalAsMetadata. 951 incorporateFunctionMetadata(F); 952 953 // Adding function arguments to the value table. 954 for (const auto &I : F.args()) 955 EnumerateValue(&I); 956 957 FirstFuncConstantID = Values.size(); 958 959 // Add all function-level constants to the value table. 960 for (const BasicBlock &BB : F) { 961 for (const Instruction &I : BB) 962 for (const Use &OI : I.operands()) { 963 if ((isa<Constant>(OI) && !isa<GlobalValue>(OI)) || isa<InlineAsm>(OI)) 964 EnumerateValue(OI); 965 } 966 BasicBlocks.push_back(&BB); 967 ValueMap[&BB] = BasicBlocks.size(); 968 } 969 970 // Optimize the constant layout. 971 OptimizeConstants(FirstFuncConstantID, Values.size()); 972 973 // Add the function's parameter attributes so they are available for use in 974 // the function's instruction. 975 EnumerateAttributes(F.getAttributes()); 976 977 FirstInstID = Values.size(); 978 979 SmallVector<LocalAsMetadata *, 8> FnLocalMDVector; 980 // Add all of the instructions. 981 for (const BasicBlock &BB : F) { 982 for (const Instruction &I : BB) { 983 for (const Use &OI : I.operands()) { 984 if (auto *MD = dyn_cast<MetadataAsValue>(&OI)) 985 if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata())) 986 // Enumerate metadata after the instructions they might refer to. 987 FnLocalMDVector.push_back(Local); 988 } 989 990 if (!I.getType()->isVoidTy()) 991 EnumerateValue(&I); 992 } 993 } 994 995 // Add all of the function-local metadata. 996 for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) { 997 // At this point, every local values have been incorporated, we shouldn't 998 // have a metadata operand that references a value that hasn't been seen. 999 assert(ValueMap.count(FnLocalMDVector[i]->getValue()) && 1000 "Missing value for metadata operand"); 1001 EnumerateFunctionLocalMetadata(F, FnLocalMDVector[i]); 1002 } 1003 } 1004 1005 void ValueEnumerator::purgeFunction() { 1006 /// Remove purged values from the ValueMap. 1007 for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) 1008 ValueMap.erase(Values[i].first); 1009 for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i) 1010 MetadataMap.erase(MDs[i]); 1011 for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) 1012 ValueMap.erase(BasicBlocks[i]); 1013 1014 Values.resize(NumModuleValues); 1015 MDs.resize(NumModuleMDs); 1016 BasicBlocks.clear(); 1017 NumMDStrings = 0; 1018 } 1019 1020 static void IncorporateFunctionInfoGlobalBBIDs(const Function *F, 1021 DenseMap<const BasicBlock*, unsigned> &IDMap) { 1022 unsigned Counter = 0; 1023 for (const BasicBlock &BB : *F) 1024 IDMap[&BB] = ++Counter; 1025 } 1026 1027 /// getGlobalBasicBlockID - This returns the function-specific ID for the 1028 /// specified basic block. This is relatively expensive information, so it 1029 /// should only be used by rare constructs such as address-of-label. 1030 unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const { 1031 unsigned &Idx = GlobalBasicBlockIDs[BB]; 1032 if (Idx != 0) 1033 return Idx-1; 1034 1035 IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs); 1036 return getGlobalBasicBlockID(BB); 1037 } 1038 1039 uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const { 1040 return Log2_32_Ceil(getTypes().size() + 1); 1041 } 1042