Home | History | Annotate | Download | only in llvm-pdbutil
      1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "InputFile.h"
     11 
     12 #include "FormatUtil.h"
     13 #include "LinePrinter.h"
     14 
     15 #include "llvm/BinaryFormat/Magic.h"
     16 #include "llvm/DebugInfo/CodeView/CodeView.h"
     17 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
     18 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
     19 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
     20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
     21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
     22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
     23 #include "llvm/DebugInfo/PDB/Native/RawError.h"
     24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
     25 #include "llvm/DebugInfo/PDB/PDB.h"
     26 #include "llvm/Object/COFF.h"
     27 #include "llvm/Support/FileSystem.h"
     28 #include "llvm/Support/FormatVariadic.h"
     29 
     30 using namespace llvm;
     31 using namespace llvm::codeview;
     32 using namespace llvm::object;
     33 using namespace llvm::pdb;
     34 
     35 InputFile::InputFile() {}
     36 InputFile::~InputFile() {}
     37 
     38 static Expected<ModuleDebugStreamRef>
     39 getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
     40   ExitOnError Err("Unexpected error: ");
     41 
     42   auto &Dbi = Err(File.getPDBDbiStream());
     43   const auto &Modules = Dbi.modules();
     44   auto Modi = Modules.getModuleDescriptor(Index);
     45 
     46   ModuleName = Modi.getModuleName();
     47 
     48   uint16_t ModiStream = Modi.getModuleStreamIndex();
     49   if (ModiStream == kInvalidStreamIndex)
     50     return make_error<RawError>(raw_error_code::no_stream,
     51                                 "Module stream not present");
     52 
     53   auto ModStreamData = File.createIndexedStream(ModiStream);
     54 
     55   ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
     56   if (auto EC = ModS.reload())
     57     return make_error<RawError>(raw_error_code::corrupt_file,
     58                                 "Invalid module stream");
     59 
     60   return std::move(ModS);
     61 }
     62 
     63 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
     64                                              StringRef Name,
     65                                              BinaryStreamReader &Reader) {
     66   StringRef SectionName, Contents;
     67   if (Section.getName(SectionName))
     68     return false;
     69 
     70   if (SectionName != Name)
     71     return false;
     72 
     73   if (Section.getContents(Contents))
     74     return false;
     75 
     76   Reader = BinaryStreamReader(Contents, support::little);
     77   uint32_t Magic;
     78   if (Reader.bytesRemaining() < sizeof(uint32_t))
     79     return false;
     80   cantFail(Reader.readInteger(Magic));
     81   if (Magic != COFF::DEBUG_SECTION_MAGIC)
     82     return false;
     83   return true;
     84 }
     85 
     86 static inline bool isDebugSSection(object::SectionRef Section,
     87                                    DebugSubsectionArray &Subsections) {
     88   BinaryStreamReader Reader;
     89   if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
     90     return false;
     91 
     92   cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
     93   return true;
     94 }
     95 
     96 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
     97   BinaryStreamReader Reader;
     98   if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
     99       !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
    100     return false;
    101   cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
    102   return true;
    103 }
    104 
    105 static std::string formatChecksumKind(FileChecksumKind Kind) {
    106   switch (Kind) {
    107     RETURN_CASE(FileChecksumKind, None, "None");
    108     RETURN_CASE(FileChecksumKind, MD5, "MD5");
    109     RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
    110     RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
    111   }
    112   return formatUnknownEnum(Kind);
    113 }
    114 
    115 static const DebugStringTableSubsectionRef &extractStringTable(PDBFile &File) {
    116   return cantFail(File.getStringTable()).getStringTable();
    117 }
    118 
    119 template <typename... Args>
    120 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
    121   if (Append)
    122     Printer.format(std::forward<Args>(args)...);
    123   else
    124     Printer.formatLine(std::forward<Args>(args)...);
    125 }
    126 
    127 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
    128   if (!File)
    129     return;
    130 
    131   if (File->isPdb())
    132     initializeForPdb(GroupIndex);
    133   else {
    134     Name = ".debug$S";
    135     uint32_t I = 0;
    136     for (const auto &S : File->obj().sections()) {
    137       DebugSubsectionArray SS;
    138       if (!isDebugSSection(S, SS))
    139         continue;
    140 
    141       if (!SC.hasChecksums() || !SC.hasStrings())
    142         SC.initialize(SS);
    143 
    144       if (I == GroupIndex)
    145         Subsections = SS;
    146 
    147       if (SC.hasChecksums() && SC.hasStrings())
    148         break;
    149     }
    150     rebuildChecksumMap();
    151   }
    152 }
    153 
    154 StringRef SymbolGroup::name() const { return Name; }
    155 
    156 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
    157   Subsections = SS;
    158 }
    159 
    160 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
    161 
    162 void SymbolGroup::initializeForPdb(uint32_t Modi) {
    163   assert(File && File->isPdb());
    164 
    165   // PDB always uses the same string table, but each module has its own
    166   // checksums.  So we only set the strings if they're not already set.
    167   if (!SC.hasStrings())
    168     SC.setStrings(extractStringTable(File->pdb()));
    169 
    170   SC.resetChecksums();
    171   auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
    172   if (!MDS) {
    173     consumeError(MDS.takeError());
    174     return;
    175   }
    176 
    177   DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
    178   Subsections = DebugStream->getSubsectionsArray();
    179   SC.initialize(Subsections);
    180   rebuildChecksumMap();
    181 }
    182 
    183 void SymbolGroup::rebuildChecksumMap() {
    184   if (!SC.hasChecksums())
    185     return;
    186 
    187   for (const auto &Entry : SC.checksums()) {
    188     auto S = SC.strings().getString(Entry.FileNameOffset);
    189     if (!S)
    190       continue;
    191     ChecksumsByFile[*S] = Entry;
    192   }
    193 }
    194 
    195 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
    196   assert(File && File->isPdb() && DebugStream);
    197   return *DebugStream;
    198 }
    199 
    200 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
    201   return SC.strings().getString(Offset);
    202 }
    203 
    204 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
    205                                      bool Append) const {
    206   auto FC = ChecksumsByFile.find(File);
    207   if (FC == ChecksumsByFile.end()) {
    208     formatInternal(Printer, Append, "- (no checksum) {0}", File);
    209     return;
    210   }
    211 
    212   formatInternal(Printer, Append, "- ({0}: {1}) {2}",
    213                  formatChecksumKind(FC->getValue().Kind),
    214                  toHex(FC->getValue().Checksum), File);
    215 }
    216 
    217 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
    218                                             uint32_t Offset,
    219                                             bool Append) const {
    220   if (!SC.hasChecksums()) {
    221     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
    222     return;
    223   }
    224 
    225   auto Iter = SC.checksums().getArray().at(Offset);
    226   if (Iter == SC.checksums().getArray().end()) {
    227     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
    228     return;
    229   }
    230 
    231   uint32_t FO = Iter->FileNameOffset;
    232   auto ExpectedFile = getNameFromStringTable(FO);
    233   if (!ExpectedFile) {
    234     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
    235     consumeError(ExpectedFile.takeError());
    236     return;
    237   }
    238   if (Iter->Kind == FileChecksumKind::None) {
    239     formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
    240   } else {
    241     formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
    242                    formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
    243   }
    244 }
    245 
    246 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
    247   InputFile IF;
    248   if (!llvm::sys::fs::exists(Path))
    249     return make_error<StringError>(formatv("File {0} not found", Path),
    250                                    inconvertibleErrorCode());
    251 
    252   file_magic Magic;
    253   if (auto EC = identify_magic(Path, Magic))
    254     return make_error<StringError>(
    255         formatv("Unable to identify file type for file {0}", Path), EC);
    256 
    257   if (Magic == file_magic::coff_object) {
    258     Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
    259     if (!BinaryOrErr)
    260       return BinaryOrErr.takeError();
    261 
    262     IF.CoffObject = std::move(*BinaryOrErr);
    263     IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
    264     return std::move(IF);
    265   }
    266 
    267   if (Magic == file_magic::pdb) {
    268     std::unique_ptr<IPDBSession> Session;
    269     if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
    270       return std::move(Err);
    271 
    272     IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
    273     IF.PdbOrObj = &IF.PdbSession->getPDBFile();
    274 
    275     return std::move(IF);
    276   }
    277 
    278   if (!AllowUnknownFile)
    279     return make_error<StringError>(
    280         formatv("File {0} is not a supported file type", Path),
    281         inconvertibleErrorCode());
    282 
    283   auto Result = MemoryBuffer::getFile(Path, -1LL, false);
    284   if (!Result)
    285     return make_error<StringError>(
    286         formatv("File {0} could not be opened", Path), Result.getError());
    287 
    288   IF.UnknownFile = std::move(*Result);
    289   IF.PdbOrObj = IF.UnknownFile.get();
    290   return std::move(IF);
    291 }
    292 
    293 PDBFile &InputFile::pdb() {
    294   assert(isPdb());
    295   return *PdbOrObj.get<PDBFile *>();
    296 }
    297 
    298 const PDBFile &InputFile::pdb() const {
    299   assert(isPdb());
    300   return *PdbOrObj.get<PDBFile *>();
    301 }
    302 
    303 object::COFFObjectFile &InputFile::obj() {
    304   assert(isObj());
    305   return *PdbOrObj.get<object::COFFObjectFile *>();
    306 }
    307 
    308 const object::COFFObjectFile &InputFile::obj() const {
    309   assert(isObj());
    310   return *PdbOrObj.get<object::COFFObjectFile *>();
    311 }
    312 
    313 MemoryBuffer &InputFile::unknown() {
    314   assert(isUnknown());
    315   return *PdbOrObj.get<MemoryBuffer *>();
    316 }
    317 
    318 const MemoryBuffer &InputFile::unknown() const {
    319   assert(isUnknown());
    320   return *PdbOrObj.get<MemoryBuffer *>();
    321 }
    322 
    323 StringRef InputFile::getFilePath() const {
    324   if (isPdb())
    325     return pdb().getFilePath();
    326   if (isObj())
    327     return obj().getFileName();
    328   assert(isUnknown());
    329   return unknown().getBufferIdentifier();
    330 }
    331 
    332 bool InputFile::hasTypes() const {
    333   if (isPdb())
    334     return pdb().hasPDBTpiStream();
    335 
    336   for (const auto &Section : obj().sections()) {
    337     CVTypeArray Types;
    338     if (isDebugTSection(Section, Types))
    339       return true;
    340   }
    341   return false;
    342 }
    343 
    344 bool InputFile::hasIds() const {
    345   if (isObj())
    346     return false;
    347   return pdb().hasPDBIpiStream();
    348 }
    349 
    350 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
    351 
    352 bool InputFile::isObj() const {
    353   return PdbOrObj.is<object::COFFObjectFile *>();
    354 }
    355 
    356 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
    357 
    358 codeview::LazyRandomTypeCollection &
    359 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
    360   if (Types && Kind == kTypes)
    361     return *Types;
    362   if (Ids && Kind == kIds)
    363     return *Ids;
    364 
    365   if (Kind == kIds) {
    366     assert(isPdb() && pdb().hasPDBIpiStream());
    367   }
    368 
    369   // If the collection was already initialized, we should have just returned it
    370   // in step 1.
    371   if (isPdb()) {
    372     TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
    373     auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
    374                                            : pdb().getPDBTpiStream());
    375 
    376     auto &Array = Stream.typeArray();
    377     uint32_t Count = Stream.getNumTypeRecords();
    378     auto Offsets = Stream.getTypeIndexOffsets();
    379     Collection =
    380         llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
    381     return *Collection;
    382   }
    383 
    384   assert(isObj());
    385   assert(Kind == kTypes);
    386   assert(!Types);
    387 
    388   for (const auto &Section : obj().sections()) {
    389     CVTypeArray Records;
    390     if (!isDebugTSection(Section, Records))
    391       continue;
    392 
    393     Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100);
    394     return *Types;
    395   }
    396 
    397   Types = llvm::make_unique<LazyRandomTypeCollection>(100);
    398   return *Types;
    399 }
    400 
    401 codeview::LazyRandomTypeCollection &InputFile::types() {
    402   return getOrCreateTypeCollection(kTypes);
    403 }
    404 
    405 codeview::LazyRandomTypeCollection &InputFile::ids() {
    406   // Object files have only one type stream that contains both types and ids.
    407   // Similarly, some PDBs don't contain an IPI stream, and for those both types
    408   // and IDs are in the same stream.
    409   if (isObj() || !pdb().hasPDBIpiStream())
    410     return types();
    411 
    412   return getOrCreateTypeCollection(kIds);
    413 }
    414 
    415 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
    416   return make_range<SymbolGroupIterator>(symbol_groups_begin(),
    417                                          symbol_groups_end());
    418 }
    419 
    420 SymbolGroupIterator InputFile::symbol_groups_begin() {
    421   return SymbolGroupIterator(*this);
    422 }
    423 
    424 SymbolGroupIterator InputFile::symbol_groups_end() {
    425   return SymbolGroupIterator();
    426 }
    427 
    428 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
    429 
    430 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
    431   if (File.isObj()) {
    432     SectionIter = File.obj().section_begin();
    433     scanToNextDebugS();
    434   }
    435 }
    436 
    437 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
    438   bool E = isEnd();
    439   bool RE = R.isEnd();
    440   if (E || RE)
    441     return E == RE;
    442 
    443   if (Value.File != R.Value.File)
    444     return false;
    445   return Index == R.Index;
    446 }
    447 
    448 const SymbolGroup &SymbolGroupIterator::operator*() const {
    449   assert(!isEnd());
    450   return Value;
    451 }
    452 SymbolGroup &SymbolGroupIterator::operator*() {
    453   assert(!isEnd());
    454   return Value;
    455 }
    456 
    457 SymbolGroupIterator &SymbolGroupIterator::operator++() {
    458   assert(Value.File && !isEnd());
    459   ++Index;
    460   if (isEnd())
    461     return *this;
    462 
    463   if (Value.File->isPdb()) {
    464     Value.updatePdbModi(Index);
    465     return *this;
    466   }
    467 
    468   scanToNextDebugS();
    469   return *this;
    470 }
    471 
    472 void SymbolGroupIterator::scanToNextDebugS() {
    473   assert(SectionIter.hasValue());
    474   auto End = Value.File->obj().section_end();
    475   auto &Iter = *SectionIter;
    476   assert(!isEnd());
    477 
    478   while (++Iter != End) {
    479     DebugSubsectionArray SS;
    480     SectionRef SR = *Iter;
    481     if (!isDebugSSection(SR, SS))
    482       continue;
    483 
    484     Value.updateDebugS(SS);
    485     return;
    486   }
    487 }
    488 
    489 bool SymbolGroupIterator::isEnd() const {
    490   if (!Value.File)
    491     return true;
    492   if (Value.File->isPdb()) {
    493     auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
    494     uint32_t Count = Dbi.modules().getModuleCount();
    495     assert(Index <= Count);
    496     return Index == Count;
    497   }
    498 
    499   assert(SectionIter.hasValue());
    500   return *SectionIter == Value.File->obj().section_end();
    501 }
    502