1 //===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the ValueEnumerator class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ValueEnumerator.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallPtrSet.h" 17 #include "llvm/IR/Constants.h" 18 #include "llvm/IR/DebugInfoMetadata.h" 19 #include "llvm/IR/DerivedTypes.h" 20 #include "llvm/IR/Instructions.h" 21 #include "llvm/IR/Module.h" 22 #include "llvm/IR/ValueSymbolTable.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <algorithm> 26 using namespace llvm; 27 28 namespace llvm_2_9_func { 29 30 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) { 31 return V.first->getType()->isIntOrIntVectorTy(); 32 } 33 34 /// ValueEnumerator - Enumerate module-level information. 35 ValueEnumerator::ValueEnumerator(const llvm::Module &M) 36 : HasMDString(false), HasDILocation(false) { 37 // Enumerate the global variables. 38 for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 39 I != E; ++I) 40 EnumerateValue(&*I); 41 42 // Enumerate the functions. 43 for (llvm::Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { 44 EnumerateValue(&*I); 45 EnumerateAttributes(cast<Function>(I)->getAttributes()); 46 } 47 48 // Enumerate the aliases. 49 for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); 50 I != E; ++I) 51 EnumerateValue(&*I); 52 53 // Remember what is the cutoff between globalvalue's and other constants. 54 unsigned FirstConstant = Values.size(); 55 56 // Enumerate the global variable initializers. 57 for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 58 I != E; ++I) 59 if (I->hasInitializer()) 60 EnumerateValue(I->getInitializer()); 61 62 // Enumerate the aliasees. 63 for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); 64 I != E; ++I) 65 EnumerateValue(I->getAliasee()); 66 67 // Enumerate the metadata type. 68 // 69 // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode 70 // only encodes the metadata type when it's used as a value. 71 EnumerateType(Type::getMetadataTy(M.getContext())); 72 73 // Insert constants and metadata that are named at module level into the slot 74 // pool so that the module symbol table can refer to them... 75 EnumerateValueSymbolTable(M.getValueSymbolTable()); 76 EnumerateNamedMetadata(M); 77 78 SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; 79 80 // Enumerate types used by function bodies and argument lists. 81 for (const Function &F : M) { 82 for (const Argument &A : F.args()) 83 EnumerateType(A.getType()); 84 85 for (const BasicBlock &BB : F) 86 for (const Instruction &I : BB) { 87 for (const Use &Op : I.operands()) { 88 auto *MD = dyn_cast<MetadataAsValue>(&Op); 89 if (!MD) { 90 EnumerateOperandType(Op); 91 continue; 92 } 93 94 // Local metadata is enumerated during function-incorporation. 95 if (isa<LocalAsMetadata>(MD->getMetadata())) 96 continue; 97 98 EnumerateMetadata(MD->getMetadata()); 99 } 100 EnumerateType(I.getType()); 101 if (const CallInst *CI = dyn_cast<CallInst>(&I)) 102 EnumerateAttributes(CI->getAttributes()); 103 else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) 104 EnumerateAttributes(II->getAttributes()); 105 106 // Enumerate metadata attached with this instruction. 107 MDs.clear(); 108 I.getAllMetadataOtherThanDebugLoc(MDs); 109 for (unsigned i = 0, e = MDs.size(); i != e; ++i) 110 EnumerateMetadata(MDs[i].second); 111 112 // Don't enumerate the location directly -- it has a special record 113 // type -- but enumerate its operands. 114 if (DILocation *L = I.getDebugLoc()) 115 EnumerateMDNodeOperands(L); 116 } 117 } 118 119 // Optimize constant ordering. 120 OptimizeConstants(FirstConstant, Values.size()); 121 } 122 123 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { 124 InstructionMapType::const_iterator I = InstructionMap.find(Inst); 125 assert(I != InstructionMap.end() && "Instruction is not mapped!"); 126 return I->second; 127 } 128 129 void ValueEnumerator::setInstructionID(const Instruction *I) { 130 InstructionMap[I] = InstructionCount++; 131 } 132 133 unsigned ValueEnumerator::getValueID(const Value *V) const { 134 if (auto *MD = dyn_cast<MetadataAsValue>(V)) 135 return getMetadataID(MD->getMetadata()); 136 137 ValueMapType::const_iterator I = ValueMap.find(V); 138 assert(I != ValueMap.end() && "Value not in slotcalculator!"); 139 return I->second-1; 140 } 141 142 void ValueEnumerator::dump() const { 143 print(dbgs(), ValueMap, "Default"); 144 dbgs() << '\n'; 145 print(dbgs(), MDValueMap, "MetaData"); 146 dbgs() << '\n'; 147 } 148 149 void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map, 150 const char *Name) const { 151 152 OS << "Map Name: " << Name << "\n"; 153 OS << "Size: " << Map.size() << "\n"; 154 for (ValueMapType::const_iterator I = Map.begin(), 155 E = Map.end(); I != E; ++I) { 156 157 const Value *V = I->first; 158 if (V->hasName()) 159 OS << "Value: " << V->getName(); 160 else 161 OS << "Value: [null]\n"; 162 V->dump(); 163 164 OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):"; 165 for (const Use &U : V->uses()) { 166 if (&U != &*V->use_begin()) 167 OS << ","; 168 if(U->hasName()) 169 OS << " " << U->getName(); 170 else 171 OS << " [null]"; 172 173 } 174 OS << "\n\n"; 175 } 176 } 177 178 void ValueEnumerator::print(llvm::raw_ostream &OS, const MetadataMapType &Map, 179 const char *Name) const { 180 181 OS << "Map Name: " << Name << "\n"; 182 OS << "Size: " << Map.size() << "\n"; 183 for (auto I = Map.begin(), E = Map.end(); I != E; ++I) { 184 const llvm::Metadata *MD = I->first; 185 OS << "Metadata: slot = " << I->second << "\n"; 186 MD->print(OS); 187 } 188 } 189 190 191 // Optimize constant ordering. 192 namespace { 193 struct CstSortPredicate { 194 ValueEnumerator &VE; 195 explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {} 196 bool operator()(const std::pair<const Value*, unsigned> &LHS, 197 const std::pair<const Value*, unsigned> &RHS) { 198 // Sort by plane. 199 if (LHS.first->getType() != RHS.first->getType()) 200 return VE.getTypeID(LHS.first->getType()) < 201 VE.getTypeID(RHS.first->getType()); 202 // Then by frequency. 203 return LHS.second > RHS.second; 204 } 205 }; 206 } 207 208 /// OptimizeConstants - Reorder constant pool for denser encoding. 209 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { 210 if (CstStart == CstEnd || CstStart+1 == CstEnd) return; 211 212 CstSortPredicate P(*this); 213 std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P); 214 215 // Ensure that integer and vector of integer constants are at the start of the 216 // constant pool. This is important so that GEP structure indices come before 217 // gep constant exprs. 218 std::partition(Values.begin()+CstStart, Values.begin()+CstEnd, 219 isIntOrIntVectorValue); 220 221 // Rebuild the modified portion of ValueMap. 222 for (; CstStart != CstEnd; ++CstStart) 223 ValueMap[Values[CstStart].first] = CstStart+1; 224 } 225 226 227 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol 228 /// table into the values table. 229 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { 230 for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); 231 VI != VE; ++VI) 232 EnumerateValue(VI->getValue()); 233 } 234 235 /// EnumerateNamedMetadata - Insert all of the values referenced by 236 /// named metadata in the specified module. 237 void ValueEnumerator::EnumerateNamedMetadata(const llvm::Module &M) { 238 for (llvm::Module::const_named_metadata_iterator I = M.named_metadata_begin(), 239 E = M.named_metadata_end(); 240 I != E; ++I) 241 EnumerateNamedMDNode(&*I); 242 } 243 244 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { 245 for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) 246 EnumerateMetadata(MD->getOperand(i)); 247 } 248 249 /// EnumerateMDNodeOperands - Enumerate all non-function-local values 250 /// and types referenced by the given MDNode. 251 void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) { 252 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 253 Metadata *MD = N->getOperand(i); 254 if (!MD) 255 continue; 256 assert(!isa<LocalAsMetadata>(MD) && "MDNodes cannot be function-local"); 257 EnumerateMetadata(MD); 258 } 259 } 260 261 void ValueEnumerator::EnumerateMetadata(const llvm::Metadata *MD) { 262 assert( 263 (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) && 264 "Invalid metadata kind"); 265 266 // Insert a dummy ID to block the co-recursive call to 267 // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph. 268 // 269 // Return early if there's already an ID. 270 if (!MDValueMap.insert(std::make_pair(MD, 0)).second) 271 return; 272 273 // Visit operands first to minimize RAUW. 274 if (auto *N = dyn_cast<MDNode>(MD)) 275 EnumerateMDNodeOperands(N); 276 else if (auto *C = dyn_cast<ConstantAsMetadata>(MD)) 277 EnumerateValue(C->getValue()); 278 279 HasMDString |= isa<MDString>(MD); 280 HasDILocation |= isa<DILocation>(MD); 281 282 // Replace the dummy ID inserted above with the correct one. MDValueMap may 283 // have changed by inserting operands, so we need a fresh lookup here. 284 MDs.push_back(MD); 285 MDValueMap[MD] = MDs.size(); 286 } 287 288 /// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata 289 /// information reachable from the metadata. 290 void ValueEnumerator::EnumerateFunctionLocalMetadata( 291 const llvm::LocalAsMetadata *Local) { 292 // Check to see if it's already in! 293 unsigned &MDValueID = MDValueMap[Local]; 294 if (MDValueID) 295 return; 296 297 MDs.push_back(Local); 298 MDValueID = MDs.size(); 299 300 EnumerateValue(Local->getValue()); 301 302 // Also, collect all function-local metadata for easy access. 303 FunctionLocalMDs.push_back(Local); 304 } 305 306 void ValueEnumerator::EnumerateValue(const Value *V) { 307 assert(!V->getType()->isVoidTy() && "Can't insert void values!"); 308 assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!"); 309 310 // Check to see if it's already in! 311 unsigned &ValueID = ValueMap[V]; 312 if (ValueID) { 313 // Increment use count. 314 Values[ValueID-1].second++; 315 return; 316 } 317 318 // Enumerate the type of this value. 319 EnumerateType(V->getType()); 320 321 if (const Constant *C = dyn_cast<Constant>(V)) { 322 if (isa<GlobalValue>(C)) { 323 // Initializers for globals are handled explicitly elsewhere. 324 } else if (C->getNumOperands()) { 325 // If a constant has operands, enumerate them. This makes sure that if a 326 // constant has uses (for example an array of const ints), that they are 327 // inserted also. 328 329 // We prefer to enumerate them with values before we enumerate the user 330 // itself. This makes it more likely that we can avoid forward references 331 // in the reader. We know that there can be no cycles in the constants 332 // graph that don't go through a global variable. 333 for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); 334 I != E; ++I) 335 if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress. 336 EnumerateValue(*I); 337 338 // Finally, add the value. Doing this could make the ValueID reference be 339 // dangling, don't reuse it. 340 Values.push_back(std::make_pair(V, 1U)); 341 ValueMap[V] = Values.size(); 342 return; 343 } else if (const ConstantDataSequential *CDS = 344 dyn_cast<ConstantDataSequential>(C)) { 345 // For our legacy handling of the new ConstantDataSequential type, we 346 // need to enumerate the individual elements, as well as mark the 347 // outer constant as used. 348 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) 349 EnumerateValue(CDS->getElementAsConstant(i)); 350 Values.push_back(std::make_pair(V, 1U)); 351 ValueMap[V] = Values.size(); 352 return; 353 } 354 } 355 356 // Add the value. 357 Values.push_back(std::make_pair(V, 1U)); 358 ValueID = Values.size(); 359 } 360 361 362 void ValueEnumerator::EnumerateType(Type *Ty) { 363 unsigned *TypeID = &TypeMap[Ty]; 364 365 // We've already seen this type. 366 if (*TypeID) 367 return; 368 369 // If it is a non-anonymous struct, mark the type as being visited so that we 370 // don't recursively visit it. This is safe because we allow forward 371 // references of these in the bitcode reader. 372 if (StructType *STy = dyn_cast<StructType>(Ty)) 373 if (!STy->isLiteral()) 374 *TypeID = ~0U; 375 376 // Enumerate all of the subtypes before we enumerate this type. This ensures 377 // that the type will be enumerated in an order that can be directly built. 378 for (Type *SubTy : Ty->subtypes()) 379 EnumerateType(SubTy); 380 381 // Refresh the TypeID pointer in case the table rehashed. 382 TypeID = &TypeMap[Ty]; 383 384 // Check to see if we got the pointer another way. This can happen when 385 // enumerating recursive types that hit the base case deeper than they start. 386 // 387 // If this is actually a struct that we are treating as forward ref'able, 388 // then emit the definition now that all of its contents are available. 389 if (*TypeID && *TypeID != ~0U) 390 return; 391 392 // Add this type now that its contents are all happily enumerated. 393 Types.push_back(Ty); 394 395 *TypeID = Types.size(); 396 } 397 398 // Enumerate the types for the specified value. If the value is a constant, 399 // walk through it, enumerating the types of the constant. 400 void ValueEnumerator::EnumerateOperandType(const Value *V) { 401 EnumerateType(V->getType()); 402 403 if (auto *MD = dyn_cast<MetadataAsValue>(V)) { 404 assert(!isa<LocalAsMetadata>(MD->getMetadata()) && 405 "Function-local metadata should be left for later"); 406 407 EnumerateMetadata(MD->getMetadata()); 408 return; 409 } 410 411 const Constant *C = dyn_cast<Constant>(V); 412 if (!C) 413 return; 414 415 // If this constant is already enumerated, ignore it, we know its type must 416 // be enumerated. 417 if (ValueMap.count(C)) 418 return; 419 420 // This constant may have operands, make sure to enumerate the types in 421 // them. 422 for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { 423 const Value *Op = C->getOperand(i); 424 425 // Don't enumerate basic blocks here, this happens as operands to 426 // blockaddress. 427 if (isa<BasicBlock>(Op)) 428 continue; 429 430 EnumerateOperandType(Op); 431 } 432 } 433 434 void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) { 435 if (PAL.isEmpty()) return; // null is always 0. 436 437 // Do a lookup. 438 unsigned &Entry = AttributeMap[PAL]; 439 if (Entry == 0) { 440 // Never saw this before, add it. 441 Attribute.push_back(PAL); 442 Entry = Attribute.size(); 443 } 444 445 // Do lookups for all attribute groups. 446 for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { 447 AttributeSet AS = PAL.getSlotAttributes(i); 448 unsigned &Entry = AttributeGroupMap[AS]; 449 if (Entry == 0) { 450 AttributeGroups.push_back(AS); 451 Entry = AttributeGroups.size(); 452 } 453 } 454 } 455 456 void ValueEnumerator::incorporateFunction(const Function &F) { 457 InstructionCount = 0; 458 NumModuleValues = Values.size(); 459 NumModuleMDs = MDs.size(); 460 461 // Adding function arguments to the value table. 462 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); 463 I != E; ++I) 464 EnumerateValue(&*I); 465 466 FirstFuncConstantID = Values.size(); 467 468 // Add all function-level constants to the value table. 469 for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 470 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) 471 for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); 472 OI != E; ++OI) { 473 if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) || 474 isa<InlineAsm>(*OI)) 475 EnumerateValue(*OI); 476 } 477 BasicBlocks.push_back(&*BB); 478 ValueMap[&*BB] = BasicBlocks.size(); 479 } 480 481 // Optimize the constant layout. 482 OptimizeConstants(FirstFuncConstantID, Values.size()); 483 484 // Add the function's parameter attributes so they are available for use in 485 // the function's instruction. 486 EnumerateAttributes(F.getAttributes()); 487 488 FirstInstID = Values.size(); 489 490 SmallVector<llvm::LocalAsMetadata *, 8> FnLocalMDVector; 491 // Add all of the instructions. 492 for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 493 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { 494 for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); 495 OI != E; ++OI) { 496 if (auto *MD = dyn_cast<llvm::MetadataAsValue>(&*OI)) 497 if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata())) 498 // Enumerate metadata after the instructions they might refer to. 499 FnLocalMDVector.push_back(Local); 500 } 501 502 if (!I->getType()->isVoidTy()) 503 EnumerateValue(&*I); 504 } 505 } 506 507 // Add all of the function-local metadata. 508 for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) 509 EnumerateFunctionLocalMetadata(FnLocalMDVector[i]); 510 } 511 512 void ValueEnumerator::purgeFunction() { 513 /// Remove purged values from the ValueMap. 514 for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) 515 ValueMap.erase(Values[i].first); 516 for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i) 517 MDValueMap.erase(MDs[i]); 518 for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) 519 ValueMap.erase(BasicBlocks[i]); 520 521 Values.resize(NumModuleValues); 522 MDs.resize(NumModuleMDs); 523 BasicBlocks.clear(); 524 FunctionLocalMDs.clear(); 525 } 526 527 static void IncorporateFunctionInfoGlobalBBIDs(const Function *F, 528 DenseMap<const BasicBlock*, unsigned> &IDMap) { 529 unsigned Counter = 0; 530 for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 531 IDMap[&*BB] = ++Counter; 532 } 533 534 /// getGlobalBasicBlockID - This returns the function-specific ID for the 535 /// specified basic block. This is relatively expensive information, so it 536 /// should only be used by rare constructs such as address-of-label. 537 unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const { 538 unsigned &Idx = GlobalBasicBlockIDs[BB]; 539 if (Idx != 0) 540 return Idx-1; 541 542 IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs); 543 return getGlobalBasicBlockID(BB); 544 } 545 546 } // end llvm_2_9_func namespace 547