Home | History | Annotate | Download | only in Lex
      1 //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the PTHLexer interface.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "clang/Lex/PTHLexer.h"
     15 #include "clang/Basic/FileManager.h"
     16 #include "clang/Basic/FileSystemStatCache.h"
     17 #include "clang/Basic/IdentifierTable.h"
     18 #include "clang/Basic/TokenKinds.h"
     19 #include "clang/Lex/LexDiagnostic.h"
     20 #include "clang/Lex/PTHManager.h"
     21 #include "clang/Lex/Preprocessor.h"
     22 #include "clang/Lex/Token.h"
     23 #include "llvm/ADT/StringExtras.h"
     24 #include "llvm/ADT/StringMap.h"
     25 #include "llvm/Support/EndianStream.h"
     26 #include "llvm/Support/MemoryBuffer.h"
     27 #include "llvm/Support/OnDiskHashTable.h"
     28 #include <memory>
     29 #include <system_error>
     30 using namespace clang;
     31 
     32 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
     33 
     34 //===----------------------------------------------------------------------===//
     35 // PTHLexer methods.
     36 //===----------------------------------------------------------------------===//
     37 
     38 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
     39                    const unsigned char *ppcond, PTHManager &PM)
     40   : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
     41     PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
     42 
     43   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
     44 }
     45 
     46 bool PTHLexer::Lex(Token& Tok) {
     47   //===--------------------------------------==//
     48   // Read the raw token data.
     49   //===--------------------------------------==//
     50   using namespace llvm::support;
     51 
     52   // Shadow CurPtr into an automatic variable.
     53   const unsigned char *CurPtrShadow = CurPtr;
     54 
     55   // Read in the data for the token.
     56   unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
     57   uint32_t IdentifierID =
     58       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
     59   uint32_t FileOffset =
     60       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
     61 
     62   tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
     63   Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
     64   uint32_t Len = Word0 >> 16;
     65 
     66   CurPtr = CurPtrShadow;
     67 
     68   //===--------------------------------------==//
     69   // Construct the token itself.
     70   //===--------------------------------------==//
     71 
     72   Tok.startToken();
     73   Tok.setKind(TKind);
     74   Tok.setFlag(TFlags);
     75   assert(!LexingRawMode);
     76   Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
     77   Tok.setLength(Len);
     78 
     79   // Handle identifiers.
     80   if (Tok.isLiteral()) {
     81     Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
     82   }
     83   else if (IdentifierID) {
     84     MIOpt.ReadToken();
     85     IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
     86 
     87     Tok.setIdentifierInfo(II);
     88 
     89     // Change the kind of this identifier to the appropriate token kind, e.g.
     90     // turning "for" into a keyword.
     91     Tok.setKind(II->getTokenID());
     92 
     93     if (II->isHandleIdentifierCase())
     94       return PP->HandleIdentifier(Tok);
     95 
     96     return true;
     97   }
     98 
     99   //===--------------------------------------==//
    100   // Process the token.
    101   //===--------------------------------------==//
    102   if (TKind == tok::eof) {
    103     // Save the end-of-file token.
    104     EofToken = Tok;
    105 
    106     assert(!ParsingPreprocessorDirective);
    107     assert(!LexingRawMode);
    108 
    109     return LexEndOfFile(Tok);
    110   }
    111 
    112   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
    113     LastHashTokPtr = CurPtr - StoredTokenSize;
    114     assert(!LexingRawMode);
    115     PP->HandleDirective(Tok);
    116 
    117     return false;
    118   }
    119 
    120   if (TKind == tok::eod) {
    121     assert(ParsingPreprocessorDirective);
    122     ParsingPreprocessorDirective = false;
    123     return true;
    124   }
    125 
    126   MIOpt.ReadToken();
    127   return true;
    128 }
    129 
    130 bool PTHLexer::LexEndOfFile(Token &Result) {
    131   // If we hit the end of the file while parsing a preprocessor directive,
    132   // end the preprocessor directive first.  The next token returned will
    133   // then be the end of file.
    134   if (ParsingPreprocessorDirective) {
    135     ParsingPreprocessorDirective = false; // Done parsing the "line".
    136     return true;  // Have a token.
    137   }
    138 
    139   assert(!LexingRawMode);
    140 
    141   // If we are in a #if directive, emit an error.
    142   while (!ConditionalStack.empty()) {
    143     if (PP->getCodeCompletionFileLoc() != FileStartLoc)
    144       PP->Diag(ConditionalStack.back().IfLoc,
    145                diag::err_pp_unterminated_conditional);
    146     ConditionalStack.pop_back();
    147   }
    148 
    149   // Finally, let the preprocessor handle this.
    150   return PP->HandleEndOfFile(Result);
    151 }
    152 
    153 // FIXME: We can just grab the last token instead of storing a copy
    154 // into EofToken.
    155 void PTHLexer::getEOF(Token& Tok) {
    156   assert(EofToken.is(tok::eof));
    157   Tok = EofToken;
    158 }
    159 
    160 void PTHLexer::DiscardToEndOfLine() {
    161   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
    162          "Must be in a preprocessing directive!");
    163 
    164   // We assume that if the preprocessor wishes to discard to the end of
    165   // the line that it also means to end the current preprocessor directive.
    166   ParsingPreprocessorDirective = false;
    167 
    168   // Skip tokens by only peeking at their token kind and the flags.
    169   // We don't need to actually reconstruct full tokens from the token buffer.
    170   // This saves some copies and it also reduces IdentifierInfo* lookup.
    171   const unsigned char* p = CurPtr;
    172   while (1) {
    173     // Read the token kind.  Are we at the end of the file?
    174     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
    175     if (x == tok::eof) break;
    176 
    177     // Read the token flags.  Are we at the start of the next line?
    178     Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
    179     if (y & Token::StartOfLine) break;
    180 
    181     // Skip to the next token.
    182     p += StoredTokenSize;
    183   }
    184 
    185   CurPtr = p;
    186 }
    187 
    188 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
    189 bool PTHLexer::SkipBlock() {
    190   using namespace llvm::support;
    191   assert(CurPPCondPtr && "No cached PP conditional information.");
    192   assert(LastHashTokPtr && "No known '#' token.");
    193 
    194   const unsigned char *HashEntryI = nullptr;
    195   uint32_t TableIdx;
    196 
    197   do {
    198     // Read the token offset from the side-table.
    199     uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
    200 
    201     // Read the target table index from the side-table.
    202     TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
    203 
    204     // Compute the actual memory address of the '#' token data for this entry.
    205     HashEntryI = TokBuf + Offset;
    206 
    207     // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
    208     //  contain nested blocks.  In the side-table we can jump over these
    209     //  nested blocks instead of doing a linear search if the next "sibling"
    210     //  entry is not at a location greater than LastHashTokPtr.
    211     if (HashEntryI < LastHashTokPtr && TableIdx) {
    212       // In the side-table we are still at an entry for a '#' token that
    213       // is earlier than the last one we saw.  Check if the location we would
    214       // stride gets us closer.
    215       const unsigned char* NextPPCondPtr =
    216         PPCond + TableIdx*(sizeof(uint32_t)*2);
    217       assert(NextPPCondPtr >= CurPPCondPtr);
    218       // Read where we should jump to.
    219       const unsigned char *HashEntryJ =
    220           TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
    221 
    222       if (HashEntryJ <= LastHashTokPtr) {
    223         // Jump directly to the next entry in the side table.
    224         HashEntryI = HashEntryJ;
    225         TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
    226         CurPPCondPtr = NextPPCondPtr;
    227       }
    228     }
    229   }
    230   while (HashEntryI < LastHashTokPtr);
    231   assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
    232   assert(TableIdx && "No jumping from #endifs.");
    233 
    234   // Update our side-table iterator.
    235   const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
    236   assert(NextPPCondPtr >= CurPPCondPtr);
    237   CurPPCondPtr = NextPPCondPtr;
    238 
    239   // Read where we should jump to.
    240   HashEntryI =
    241       TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
    242   uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
    243 
    244   // By construction NextIdx will be zero if this is a #endif.  This is useful
    245   // to know to obviate lexing another token.
    246   bool isEndif = NextIdx == 0;
    247 
    248   // This case can occur when we see something like this:
    249   //
    250   //  #if ...
    251   //   /* a comment or nothing */
    252   //  #elif
    253   //
    254   // If we are skipping the first #if block it will be the case that CurPtr
    255   // already points 'elif'.  Just return.
    256 
    257   if (CurPtr > HashEntryI) {
    258     assert(CurPtr == HashEntryI + StoredTokenSize);
    259     // Did we reach a #endif?  If so, go ahead and consume that token as well.
    260     if (isEndif)
    261       CurPtr += StoredTokenSize * 2;
    262     else
    263       LastHashTokPtr = HashEntryI;
    264 
    265     return isEndif;
    266   }
    267 
    268   // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
    269   CurPtr = HashEntryI;
    270 
    271   // Update the location of the last observed '#'.  This is useful if we
    272   // are skipping multiple blocks.
    273   LastHashTokPtr = CurPtr;
    274 
    275   // Skip the '#' token.
    276   assert(((tok::TokenKind)*CurPtr) == tok::hash);
    277   CurPtr += StoredTokenSize;
    278 
    279   // Did we reach a #endif?  If so, go ahead and consume that token as well.
    280   if (isEndif) {
    281     CurPtr += StoredTokenSize * 2;
    282   }
    283 
    284   return isEndif;
    285 }
    286 
    287 SourceLocation PTHLexer::getSourceLocation() {
    288   // getSourceLocation is not on the hot path.  It is used to get the location
    289   // of the next token when transitioning back to this lexer when done
    290   // handling a #included file.  Just read the necessary data from the token
    291   // data buffer to construct the SourceLocation object.
    292   // NOTE: This is a virtual function; hence it is defined out-of-line.
    293   using namespace llvm::support;
    294 
    295   const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
    296   uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
    297   return FileStartLoc.getLocWithOffset(Offset);
    298 }
    299 
    300 //===----------------------------------------------------------------------===//
    301 // PTH file lookup: map from strings to file data.
    302 //===----------------------------------------------------------------------===//
    303 
    304 /// PTHFileLookup - This internal data structure is used by the PTHManager
    305 ///  to map from FileEntry objects managed by FileManager to offsets within
    306 ///  the PTH file.
    307 namespace {
    308 class PTHFileData {
    309   const uint32_t TokenOff;
    310   const uint32_t PPCondOff;
    311 public:
    312   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
    313     : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
    314 
    315   uint32_t getTokenOffset() const { return TokenOff; }
    316   uint32_t getPPCondOffset() const { return PPCondOff; }
    317 };
    318 
    319 
    320 class PTHFileLookupCommonTrait {
    321 public:
    322   typedef std::pair<unsigned char, const char*> internal_key_type;
    323   typedef unsigned hash_value_type;
    324   typedef unsigned offset_type;
    325 
    326   static hash_value_type ComputeHash(internal_key_type x) {
    327     return llvm::HashString(x.second);
    328   }
    329 
    330   static std::pair<unsigned, unsigned>
    331   ReadKeyDataLength(const unsigned char*& d) {
    332     using namespace llvm::support;
    333     unsigned keyLen =
    334         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
    335     unsigned dataLen = (unsigned) *(d++);
    336     return std::make_pair(keyLen, dataLen);
    337   }
    338 
    339   static internal_key_type ReadKey(const unsigned char* d, unsigned) {
    340     unsigned char k = *(d++); // Read the entry kind.
    341     return std::make_pair(k, (const char*) d);
    342   }
    343 };
    344 
    345 class PTHFileLookupTrait : public PTHFileLookupCommonTrait {
    346 public:
    347   typedef const FileEntry* external_key_type;
    348   typedef PTHFileData      data_type;
    349 
    350   static internal_key_type GetInternalKey(const FileEntry* FE) {
    351     return std::make_pair((unsigned char) 0x1, FE->getName());
    352   }
    353 
    354   static bool EqualKey(internal_key_type a, internal_key_type b) {
    355     return a.first == b.first && strcmp(a.second, b.second) == 0;
    356   }
    357 
    358   static PTHFileData ReadData(const internal_key_type& k,
    359                               const unsigned char* d, unsigned) {
    360     assert(k.first == 0x1 && "Only file lookups can match!");
    361     using namespace llvm::support;
    362     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
    363     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
    364     return PTHFileData(x, y);
    365   }
    366 };
    367 
    368 class PTHStringLookupTrait {
    369 public:
    370   typedef uint32_t data_type;
    371   typedef const std::pair<const char*, unsigned> external_key_type;
    372   typedef external_key_type internal_key_type;
    373   typedef uint32_t hash_value_type;
    374   typedef unsigned offset_type;
    375 
    376   static bool EqualKey(const internal_key_type& a,
    377                        const internal_key_type& b) {
    378     return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
    379                                   : false;
    380   }
    381 
    382   static hash_value_type ComputeHash(const internal_key_type& a) {
    383     return llvm::HashString(StringRef(a.first, a.second));
    384   }
    385 
    386   // This hopefully will just get inlined and removed by the optimizer.
    387   static const internal_key_type&
    388   GetInternalKey(const external_key_type& x) { return x; }
    389 
    390   static std::pair<unsigned, unsigned>
    391   ReadKeyDataLength(const unsigned char*& d) {
    392     using namespace llvm::support;
    393     return std::make_pair(
    394         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
    395         sizeof(uint32_t));
    396   }
    397 
    398   static std::pair<const char*, unsigned>
    399   ReadKey(const unsigned char* d, unsigned n) {
    400       assert(n >= 2 && d[n-1] == '\0');
    401       return std::make_pair((const char*) d, n-1);
    402     }
    403 
    404   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
    405                            unsigned) {
    406     using namespace llvm::support;
    407     return endian::readNext<uint32_t, little, unaligned>(d);
    408   }
    409 };
    410 
    411 } // end anonymous namespace
    412 
    413 typedef llvm::OnDiskChainedHashTable<PTHFileLookupTrait>   PTHFileLookup;
    414 typedef llvm::OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
    415 
    416 //===----------------------------------------------------------------------===//
    417 // PTHManager methods.
    418 //===----------------------------------------------------------------------===//
    419 
    420 PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
    421                        const unsigned char* idDataTable,
    422                        IdentifierInfo** perIDCache,
    423                        void* stringIdLookup, unsigned numIds,
    424                        const unsigned char* spellingBase,
    425                        const char* originalSourceFile)
    426 : Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
    427   IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
    428   NumIds(numIds), PP(nullptr), SpellingBase(spellingBase),
    429   OriginalSourceFile(originalSourceFile) {}
    430 
    431 PTHManager::~PTHManager() {
    432   delete Buf;
    433   delete (PTHFileLookup*) FileLookup;
    434   delete (PTHStringIdLookup*) StringIdLookup;
    435   free(PerIDCache);
    436 }
    437 
    438 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
    439   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
    440 }
    441 
    442 PTHManager *PTHManager::Create(const std::string &file,
    443                                DiagnosticsEngine &Diags) {
    444   // Memory map the PTH file.
    445   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
    446       llvm::MemoryBuffer::getFile(file);
    447 
    448   if (!FileOrErr) {
    449     // FIXME: Add ec.message() to this diag.
    450     Diags.Report(diag::err_invalid_pth_file) << file;
    451     return nullptr;
    452   }
    453   std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
    454 
    455   using namespace llvm::support;
    456 
    457   // Get the buffer ranges and check if there are at least three 32-bit
    458   // words at the end of the file.
    459   const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
    460   const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
    461 
    462   // Check the prologue of the file.
    463   if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
    464       memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
    465     Diags.Report(diag::err_invalid_pth_file) << file;
    466     return nullptr;
    467   }
    468 
    469   // Read the PTH version.
    470   const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
    471   unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
    472 
    473   if (Version < PTHManager::Version) {
    474     InvalidPTH(Diags,
    475         Version < PTHManager::Version
    476         ? "PTH file uses an older PTH format that is no longer supported"
    477         : "PTH file uses a newer PTH format that cannot be read");
    478     return nullptr;
    479   }
    480 
    481   // Compute the address of the index table at the end of the PTH file.
    482   const unsigned char *PrologueOffset = p;
    483 
    484   if (PrologueOffset >= BufEnd) {
    485     Diags.Report(diag::err_invalid_pth_file) << file;
    486     return nullptr;
    487   }
    488 
    489   // Construct the file lookup table.  This will be used for mapping from
    490   // FileEntry*'s to cached tokens.
    491   const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
    492   const unsigned char *FileTable =
    493       BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
    494 
    495   if (!(FileTable > BufBeg && FileTable < BufEnd)) {
    496     Diags.Report(diag::err_invalid_pth_file) << file;
    497     return nullptr; // FIXME: Proper error diagnostic?
    498   }
    499 
    500   std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
    501 
    502   // Warn if the PTH file is empty.  We still want to create a PTHManager
    503   // as the PTH could be used with -include-pth.
    504   if (FL->isEmpty())
    505     InvalidPTH(Diags, "PTH file contains no cached source data");
    506 
    507   // Get the location of the table mapping from persistent ids to the
    508   // data needed to reconstruct identifiers.
    509   const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
    510   const unsigned char *IData =
    511       BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
    512 
    513   if (!(IData >= BufBeg && IData < BufEnd)) {
    514     Diags.Report(diag::err_invalid_pth_file) << file;
    515     return nullptr;
    516   }
    517 
    518   // Get the location of the hashtable mapping between strings and
    519   // persistent IDs.
    520   const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
    521   const unsigned char *StringIdTable =
    522       BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
    523   if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
    524     Diags.Report(diag::err_invalid_pth_file) << file;
    525     return nullptr;
    526   }
    527 
    528   std::unique_ptr<PTHStringIdLookup> SL(
    529       PTHStringIdLookup::Create(StringIdTable, BufBeg));
    530 
    531   // Get the location of the spelling cache.
    532   const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
    533   const unsigned char *spellingBase =
    534       BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
    535   if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
    536     Diags.Report(diag::err_invalid_pth_file) << file;
    537     return nullptr;
    538   }
    539 
    540   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
    541   uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
    542 
    543   // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
    544   // so that we in the best case only zero out memory once when the OS returns
    545   // us new pages.
    546   IdentifierInfo **PerIDCache = nullptr;
    547 
    548   if (NumIds) {
    549     PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
    550     if (!PerIDCache) {
    551       InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
    552       return nullptr;
    553     }
    554   }
    555 
    556   // Compute the address of the original source file.
    557   const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
    558   unsigned len =
    559       endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
    560   if (!len) originalSourceBase = nullptr;
    561 
    562   // Create the new PTHManager.
    563   return new PTHManager(File.release(), FL.release(), IData, PerIDCache,
    564                         SL.release(), NumIds, spellingBase,
    565                         (const char *)originalSourceBase);
    566 }
    567 
    568 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
    569   using namespace llvm::support;
    570   // Look in the PTH file for the string data for the IdentifierInfo object.
    571   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
    572   const unsigned char *IDData =
    573       (const unsigned char *)Buf->getBufferStart() +
    574       endian::readNext<uint32_t, little, aligned>(TableEntry);
    575   assert(IDData < (const unsigned char*)Buf->getBufferEnd());
    576 
    577   // Allocate the object.
    578   std::pair<IdentifierInfo,const unsigned char*> *Mem =
    579     Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
    580 
    581   Mem->second = IDData;
    582   assert(IDData[0] != '\0');
    583   IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
    584 
    585   // Store the new IdentifierInfo in the cache.
    586   PerIDCache[PersistentID] = II;
    587   assert(II->getNameStart() && II->getNameStart()[0] != '\0');
    588   return II;
    589 }
    590 
    591 IdentifierInfo* PTHManager::get(StringRef Name) {
    592   PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
    593   // Double check our assumption that the last character isn't '\0'.
    594   assert(Name.empty() || Name.back() != '\0');
    595   PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(),
    596                                                          Name.size()));
    597   if (I == SL.end()) // No identifier found?
    598     return nullptr;
    599 
    600   // Match found.  Return the identifier!
    601   assert(*I > 0);
    602   return GetIdentifierInfo(*I-1);
    603 }
    604 
    605 PTHLexer *PTHManager::CreateLexer(FileID FID) {
    606   const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
    607   if (!FE)
    608     return nullptr;
    609 
    610   using namespace llvm::support;
    611 
    612   // Lookup the FileEntry object in our file lookup data structure.  It will
    613   // return a variant that indicates whether or not there is an offset within
    614   // the PTH file that contains cached tokens.
    615   PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
    616   PTHFileLookup::iterator I = PFL.find(FE);
    617 
    618   if (I == PFL.end()) // No tokens available?
    619     return nullptr;
    620 
    621   const PTHFileData& FileData = *I;
    622 
    623   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
    624   // Compute the offset of the token data within the buffer.
    625   const unsigned char* data = BufStart + FileData.getTokenOffset();
    626 
    627   // Get the location of pp-conditional table.
    628   const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
    629   uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
    630   if (Len == 0) ppcond = nullptr;
    631 
    632   assert(PP && "No preprocessor set yet!");
    633   return new PTHLexer(*PP, FID, data, ppcond, *this);
    634 }
    635 
    636 //===----------------------------------------------------------------------===//
    637 // 'stat' caching.
    638 //===----------------------------------------------------------------------===//
    639 
    640 namespace {
    641 class PTHStatData {
    642 public:
    643   const bool HasData;
    644   uint64_t Size;
    645   time_t ModTime;
    646   llvm::sys::fs::UniqueID UniqueID;
    647   bool IsDirectory;
    648 
    649   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
    650               bool IsDirectory)
    651       : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
    652         IsDirectory(IsDirectory) {}
    653 
    654   PTHStatData() : HasData(false) {}
    655 };
    656 
    657 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
    658 public:
    659   typedef const char* external_key_type;  // const char*
    660   typedef PTHStatData data_type;
    661 
    662   static internal_key_type GetInternalKey(const char *path) {
    663     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
    664     return std::make_pair((unsigned char) 0x0, path);
    665   }
    666 
    667   static bool EqualKey(internal_key_type a, internal_key_type b) {
    668     // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
    669     // just the paths.
    670     return strcmp(a.second, b.second) == 0;
    671   }
    672 
    673   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
    674                             unsigned) {
    675 
    676     if (k.first /* File or Directory */) {
    677       bool IsDirectory = true;
    678       if (k.first == 0x1 /* File */) {
    679         IsDirectory = false;
    680         d += 4 * 2; // Skip the first 2 words.
    681       }
    682 
    683       using namespace llvm::support;
    684 
    685       uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
    686       uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
    687       llvm::sys::fs::UniqueID UniqueID(File, Device);
    688       time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
    689       uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
    690       return data_type(Size, ModTime, UniqueID, IsDirectory);
    691     }
    692 
    693     // Negative stat.  Don't read anything.
    694     return data_type();
    695   }
    696 };
    697 
    698 class PTHStatCache : public FileSystemStatCache {
    699   typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
    700   CacheTy Cache;
    701 
    702 public:
    703   PTHStatCache(PTHFileLookup &FL) :
    704     Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
    705           FL.getBase()) {}
    706 
    707   LookupResult getStat(const char *Path, FileData &Data, bool isFile,
    708                        std::unique_ptr<vfs::File> *F,
    709                        vfs::FileSystem &FS) override {
    710     // Do the lookup for the file's data in the PTH file.
    711     CacheTy::iterator I = Cache.find(Path);
    712 
    713     // If we don't get a hit in the PTH file just forward to 'stat'.
    714     if (I == Cache.end())
    715       return statChained(Path, Data, isFile, F, FS);
    716 
    717     const PTHStatData &D = *I;
    718 
    719     if (!D.HasData)
    720       return CacheMissing;
    721 
    722     Data.Name = Path;
    723     Data.Size = D.Size;
    724     Data.ModTime = D.ModTime;
    725     Data.UniqueID = D.UniqueID;
    726     Data.IsDirectory = D.IsDirectory;
    727     Data.IsNamedPipe = false;
    728     Data.InPCH = true;
    729 
    730     return CacheExists;
    731   }
    732 };
    733 } // end anonymous namespace
    734 
    735 FileSystemStatCache *PTHManager::createStatCache() {
    736   return new PTHStatCache(*((PTHFileLookup*) FileLookup));
    737 }
    738