1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFile.h" 11 12 #include "FormatUtil.h" 13 #include "LinePrinter.h" 14 15 #include "llvm/BinaryFormat/Magic.h" 16 #include "llvm/DebugInfo/CodeView/CodeView.h" 17 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" 18 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" 19 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 23 #include "llvm/DebugInfo/PDB/Native/RawError.h" 24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 25 #include "llvm/DebugInfo/PDB/PDB.h" 26 #include "llvm/Object/COFF.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/FormatVariadic.h" 29 30 using namespace llvm; 31 using namespace llvm::codeview; 32 using namespace llvm::object; 33 using namespace llvm::pdb; 34 35 InputFile::InputFile() {} 36 InputFile::~InputFile() {} 37 38 static Expected<ModuleDebugStreamRef> 39 getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { 40 ExitOnError Err("Unexpected error: "); 41 42 auto &Dbi = Err(File.getPDBDbiStream()); 43 const auto &Modules = Dbi.modules(); 44 auto Modi = Modules.getModuleDescriptor(Index); 45 46 ModuleName = Modi.getModuleName(); 47 48 uint16_t ModiStream = Modi.getModuleStreamIndex(); 49 if (ModiStream == kInvalidStreamIndex) 50 return make_error<RawError>(raw_error_code::no_stream, 51 "Module stream not present"); 52 53 auto ModStreamData = File.createIndexedStream(ModiStream); 54 55 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 56 if (auto EC = ModS.reload()) 57 return make_error<RawError>(raw_error_code::corrupt_file, 58 "Invalid module stream"); 59 60 return std::move(ModS); 61 } 62 63 static inline bool isCodeViewDebugSubsection(object::SectionRef Section, 64 StringRef Name, 65 BinaryStreamReader &Reader) { 66 StringRef SectionName, Contents; 67 if (Section.getName(SectionName)) 68 return false; 69 70 if (SectionName != Name) 71 return false; 72 73 if (Section.getContents(Contents)) 74 return false; 75 76 Reader = BinaryStreamReader(Contents, support::little); 77 uint32_t Magic; 78 if (Reader.bytesRemaining() < sizeof(uint32_t)) 79 return false; 80 cantFail(Reader.readInteger(Magic)); 81 if (Magic != COFF::DEBUG_SECTION_MAGIC) 82 return false; 83 return true; 84 } 85 86 static inline bool isDebugSSection(object::SectionRef Section, 87 DebugSubsectionArray &Subsections) { 88 BinaryStreamReader Reader; 89 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) 90 return false; 91 92 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); 93 return true; 94 } 95 96 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { 97 BinaryStreamReader Reader; 98 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && 99 !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) 100 return false; 101 cantFail(Reader.readArray(Types, Reader.bytesRemaining())); 102 return true; 103 } 104 105 static std::string formatChecksumKind(FileChecksumKind Kind) { 106 switch (Kind) { 107 RETURN_CASE(FileChecksumKind, None, "None"); 108 RETURN_CASE(FileChecksumKind, MD5, "MD5"); 109 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); 110 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); 111 } 112 return formatUnknownEnum(Kind); 113 } 114 115 static const DebugStringTableSubsectionRef &extractStringTable(PDBFile &File) { 116 return cantFail(File.getStringTable()).getStringTable(); 117 } 118 119 template <typename... Args> 120 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) { 121 if (Append) 122 Printer.format(std::forward<Args>(args)...); 123 else 124 Printer.formatLine(std::forward<Args>(args)...); 125 } 126 127 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { 128 if (!File) 129 return; 130 131 if (File->isPdb()) 132 initializeForPdb(GroupIndex); 133 else { 134 Name = ".debug$S"; 135 uint32_t I = 0; 136 for (const auto &S : File->obj().sections()) { 137 DebugSubsectionArray SS; 138 if (!isDebugSSection(S, SS)) 139 continue; 140 141 if (!SC.hasChecksums() || !SC.hasStrings()) 142 SC.initialize(SS); 143 144 if (I == GroupIndex) 145 Subsections = SS; 146 147 if (SC.hasChecksums() && SC.hasStrings()) 148 break; 149 } 150 rebuildChecksumMap(); 151 } 152 } 153 154 StringRef SymbolGroup::name() const { return Name; } 155 156 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { 157 Subsections = SS; 158 } 159 160 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } 161 162 void SymbolGroup::initializeForPdb(uint32_t Modi) { 163 assert(File && File->isPdb()); 164 165 // PDB always uses the same string table, but each module has its own 166 // checksums. So we only set the strings if they're not already set. 167 if (!SC.hasStrings()) 168 SC.setStrings(extractStringTable(File->pdb())); 169 170 SC.resetChecksums(); 171 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); 172 if (!MDS) { 173 consumeError(MDS.takeError()); 174 return; 175 } 176 177 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS)); 178 Subsections = DebugStream->getSubsectionsArray(); 179 SC.initialize(Subsections); 180 rebuildChecksumMap(); 181 } 182 183 void SymbolGroup::rebuildChecksumMap() { 184 if (!SC.hasChecksums()) 185 return; 186 187 for (const auto &Entry : SC.checksums()) { 188 auto S = SC.strings().getString(Entry.FileNameOffset); 189 if (!S) 190 continue; 191 ChecksumsByFile[*S] = Entry; 192 } 193 } 194 195 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { 196 assert(File && File->isPdb() && DebugStream); 197 return *DebugStream; 198 } 199 200 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const { 201 return SC.strings().getString(Offset); 202 } 203 204 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, 205 bool Append) const { 206 auto FC = ChecksumsByFile.find(File); 207 if (FC == ChecksumsByFile.end()) { 208 formatInternal(Printer, Append, "- (no checksum) {0}", File); 209 return; 210 } 211 212 formatInternal(Printer, Append, "- ({0}: {1}) {2}", 213 formatChecksumKind(FC->getValue().Kind), 214 toHex(FC->getValue().Checksum), File); 215 } 216 217 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, 218 uint32_t Offset, 219 bool Append) const { 220 if (!SC.hasChecksums()) { 221 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 222 return; 223 } 224 225 auto Iter = SC.checksums().getArray().at(Offset); 226 if (Iter == SC.checksums().getArray().end()) { 227 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 228 return; 229 } 230 231 uint32_t FO = Iter->FileNameOffset; 232 auto ExpectedFile = getNameFromStringTable(FO); 233 if (!ExpectedFile) { 234 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 235 consumeError(ExpectedFile.takeError()); 236 return; 237 } 238 if (Iter->Kind == FileChecksumKind::None) { 239 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); 240 } else { 241 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, 242 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); 243 } 244 } 245 246 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) { 247 InputFile IF; 248 if (!llvm::sys::fs::exists(Path)) 249 return make_error<StringError>(formatv("File {0} not found", Path), 250 inconvertibleErrorCode()); 251 252 file_magic Magic; 253 if (auto EC = identify_magic(Path, Magic)) 254 return make_error<StringError>( 255 formatv("Unable to identify file type for file {0}", Path), EC); 256 257 if (Magic == file_magic::coff_object) { 258 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); 259 if (!BinaryOrErr) 260 return BinaryOrErr.takeError(); 261 262 IF.CoffObject = std::move(*BinaryOrErr); 263 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary()); 264 return std::move(IF); 265 } 266 267 if (Magic == file_magic::pdb) { 268 std::unique_ptr<IPDBSession> Session; 269 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) 270 return std::move(Err); 271 272 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release())); 273 IF.PdbOrObj = &IF.PdbSession->getPDBFile(); 274 275 return std::move(IF); 276 } 277 278 if (!AllowUnknownFile) 279 return make_error<StringError>( 280 formatv("File {0} is not a supported file type", Path), 281 inconvertibleErrorCode()); 282 283 auto Result = MemoryBuffer::getFile(Path, -1LL, false); 284 if (!Result) 285 return make_error<StringError>( 286 formatv("File {0} could not be opened", Path), Result.getError()); 287 288 IF.UnknownFile = std::move(*Result); 289 IF.PdbOrObj = IF.UnknownFile.get(); 290 return std::move(IF); 291 } 292 293 PDBFile &InputFile::pdb() { 294 assert(isPdb()); 295 return *PdbOrObj.get<PDBFile *>(); 296 } 297 298 const PDBFile &InputFile::pdb() const { 299 assert(isPdb()); 300 return *PdbOrObj.get<PDBFile *>(); 301 } 302 303 object::COFFObjectFile &InputFile::obj() { 304 assert(isObj()); 305 return *PdbOrObj.get<object::COFFObjectFile *>(); 306 } 307 308 const object::COFFObjectFile &InputFile::obj() const { 309 assert(isObj()); 310 return *PdbOrObj.get<object::COFFObjectFile *>(); 311 } 312 313 MemoryBuffer &InputFile::unknown() { 314 assert(isUnknown()); 315 return *PdbOrObj.get<MemoryBuffer *>(); 316 } 317 318 const MemoryBuffer &InputFile::unknown() const { 319 assert(isUnknown()); 320 return *PdbOrObj.get<MemoryBuffer *>(); 321 } 322 323 StringRef InputFile::getFilePath() const { 324 if (isPdb()) 325 return pdb().getFilePath(); 326 if (isObj()) 327 return obj().getFileName(); 328 assert(isUnknown()); 329 return unknown().getBufferIdentifier(); 330 } 331 332 bool InputFile::hasTypes() const { 333 if (isPdb()) 334 return pdb().hasPDBTpiStream(); 335 336 for (const auto &Section : obj().sections()) { 337 CVTypeArray Types; 338 if (isDebugTSection(Section, Types)) 339 return true; 340 } 341 return false; 342 } 343 344 bool InputFile::hasIds() const { 345 if (isObj()) 346 return false; 347 return pdb().hasPDBIpiStream(); 348 } 349 350 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); } 351 352 bool InputFile::isObj() const { 353 return PdbOrObj.is<object::COFFObjectFile *>(); 354 } 355 356 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); } 357 358 codeview::LazyRandomTypeCollection & 359 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { 360 if (Types && Kind == kTypes) 361 return *Types; 362 if (Ids && Kind == kIds) 363 return *Ids; 364 365 if (Kind == kIds) { 366 assert(isPdb() && pdb().hasPDBIpiStream()); 367 } 368 369 // If the collection was already initialized, we should have just returned it 370 // in step 1. 371 if (isPdb()) { 372 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; 373 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() 374 : pdb().getPDBTpiStream()); 375 376 auto &Array = Stream.typeArray(); 377 uint32_t Count = Stream.getNumTypeRecords(); 378 auto Offsets = Stream.getTypeIndexOffsets(); 379 Collection = 380 llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets); 381 return *Collection; 382 } 383 384 assert(isObj()); 385 assert(Kind == kTypes); 386 assert(!Types); 387 388 for (const auto &Section : obj().sections()) { 389 CVTypeArray Records; 390 if (!isDebugTSection(Section, Records)) 391 continue; 392 393 Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100); 394 return *Types; 395 } 396 397 Types = llvm::make_unique<LazyRandomTypeCollection>(100); 398 return *Types; 399 } 400 401 codeview::LazyRandomTypeCollection &InputFile::types() { 402 return getOrCreateTypeCollection(kTypes); 403 } 404 405 codeview::LazyRandomTypeCollection &InputFile::ids() { 406 // Object files have only one type stream that contains both types and ids. 407 // Similarly, some PDBs don't contain an IPI stream, and for those both types 408 // and IDs are in the same stream. 409 if (isObj() || !pdb().hasPDBIpiStream()) 410 return types(); 411 412 return getOrCreateTypeCollection(kIds); 413 } 414 415 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() { 416 return make_range<SymbolGroupIterator>(symbol_groups_begin(), 417 symbol_groups_end()); 418 } 419 420 SymbolGroupIterator InputFile::symbol_groups_begin() { 421 return SymbolGroupIterator(*this); 422 } 423 424 SymbolGroupIterator InputFile::symbol_groups_end() { 425 return SymbolGroupIterator(); 426 } 427 428 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} 429 430 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { 431 if (File.isObj()) { 432 SectionIter = File.obj().section_begin(); 433 scanToNextDebugS(); 434 } 435 } 436 437 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { 438 bool E = isEnd(); 439 bool RE = R.isEnd(); 440 if (E || RE) 441 return E == RE; 442 443 if (Value.File != R.Value.File) 444 return false; 445 return Index == R.Index; 446 } 447 448 const SymbolGroup &SymbolGroupIterator::operator*() const { 449 assert(!isEnd()); 450 return Value; 451 } 452 SymbolGroup &SymbolGroupIterator::operator*() { 453 assert(!isEnd()); 454 return Value; 455 } 456 457 SymbolGroupIterator &SymbolGroupIterator::operator++() { 458 assert(Value.File && !isEnd()); 459 ++Index; 460 if (isEnd()) 461 return *this; 462 463 if (Value.File->isPdb()) { 464 Value.updatePdbModi(Index); 465 return *this; 466 } 467 468 scanToNextDebugS(); 469 return *this; 470 } 471 472 void SymbolGroupIterator::scanToNextDebugS() { 473 assert(SectionIter.hasValue()); 474 auto End = Value.File->obj().section_end(); 475 auto &Iter = *SectionIter; 476 assert(!isEnd()); 477 478 while (++Iter != End) { 479 DebugSubsectionArray SS; 480 SectionRef SR = *Iter; 481 if (!isDebugSSection(SR, SS)) 482 continue; 483 484 Value.updateDebugS(SS); 485 return; 486 } 487 } 488 489 bool SymbolGroupIterator::isEnd() const { 490 if (!Value.File) 491 return true; 492 if (Value.File->isPdb()) { 493 auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); 494 uint32_t Count = Dbi.modules().getModuleCount(); 495 assert(Index <= Count); 496 return Index == Count; 497 } 498 499 assert(SectionIter.hasValue()); 500 return *SectionIter == Value.File->obj().section_end(); 501 } 502