Home | History | Annotate | Download | only in Lex
      1 //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the PTHLexer interface.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "clang/Lex/PTHLexer.h"
     15 #include "clang/Basic/FileManager.h"
     16 #include "clang/Basic/FileSystemStatCache.h"
     17 #include "clang/Basic/IdentifierTable.h"
     18 #include "clang/Basic/TokenKinds.h"
     19 #include "clang/Lex/LexDiagnostic.h"
     20 #include "clang/Lex/PTHManager.h"
     21 #include "clang/Lex/Preprocessor.h"
     22 #include "clang/Lex/Token.h"
     23 #include "llvm/ADT/StringExtras.h"
     24 #include "llvm/ADT/StringMap.h"
     25 #include "llvm/Support/EndianStream.h"
     26 #include "llvm/Support/MemoryBuffer.h"
     27 #include <memory>
     28 #include <system_error>
     29 using namespace clang;
     30 
     31 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
     32 
     33 //===----------------------------------------------------------------------===//
     34 // PTHLexer methods.
     35 //===----------------------------------------------------------------------===//
     36 
     37 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
     38                    const unsigned char *ppcond, PTHManager &PM)
     39   : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
     40     PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
     41 
     42   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
     43 }
     44 
     45 bool PTHLexer::Lex(Token& Tok) {
     46   //===--------------------------------------==//
     47   // Read the raw token data.
     48   //===--------------------------------------==//
     49   using namespace llvm::support;
     50 
     51   // Shadow CurPtr into an automatic variable.
     52   const unsigned char *CurPtrShadow = CurPtr;
     53 
     54   // Read in the data for the token.
     55   unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
     56   uint32_t IdentifierID =
     57       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
     58   uint32_t FileOffset =
     59       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
     60 
     61   tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
     62   Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
     63   uint32_t Len = Word0 >> 16;
     64 
     65   CurPtr = CurPtrShadow;
     66 
     67   //===--------------------------------------==//
     68   // Construct the token itself.
     69   //===--------------------------------------==//
     70 
     71   Tok.startToken();
     72   Tok.setKind(TKind);
     73   Tok.setFlag(TFlags);
     74   assert(!LexingRawMode);
     75   Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
     76   Tok.setLength(Len);
     77 
     78   // Handle identifiers.
     79   if (Tok.isLiteral()) {
     80     Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
     81   }
     82   else if (IdentifierID) {
     83     MIOpt.ReadToken();
     84     IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
     85 
     86     Tok.setIdentifierInfo(II);
     87 
     88     // Change the kind of this identifier to the appropriate token kind, e.g.
     89     // turning "for" into a keyword.
     90     Tok.setKind(II->getTokenID());
     91 
     92     if (II->isHandleIdentifierCase())
     93       return PP->HandleIdentifier(Tok);
     94 
     95     return true;
     96   }
     97 
     98   //===--------------------------------------==//
     99   // Process the token.
    100   //===--------------------------------------==//
    101   if (TKind == tok::eof) {
    102     // Save the end-of-file token.
    103     EofToken = Tok;
    104 
    105     assert(!ParsingPreprocessorDirective);
    106     assert(!LexingRawMode);
    107 
    108     return LexEndOfFile(Tok);
    109   }
    110 
    111   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
    112     LastHashTokPtr = CurPtr - StoredTokenSize;
    113     assert(!LexingRawMode);
    114     PP->HandleDirective(Tok);
    115 
    116     return false;
    117   }
    118 
    119   if (TKind == tok::eod) {
    120     assert(ParsingPreprocessorDirective);
    121     ParsingPreprocessorDirective = false;
    122     return true;
    123   }
    124 
    125   MIOpt.ReadToken();
    126   return true;
    127 }
    128 
    129 bool PTHLexer::LexEndOfFile(Token &Result) {
    130   // If we hit the end of the file while parsing a preprocessor directive,
    131   // end the preprocessor directive first.  The next token returned will
    132   // then be the end of file.
    133   if (ParsingPreprocessorDirective) {
    134     ParsingPreprocessorDirective = false; // Done parsing the "line".
    135     return true;  // Have a token.
    136   }
    137 
    138   assert(!LexingRawMode);
    139 
    140   // If we are in a #if directive, emit an error.
    141   while (!ConditionalStack.empty()) {
    142     if (PP->getCodeCompletionFileLoc() != FileStartLoc)
    143       PP->Diag(ConditionalStack.back().IfLoc,
    144                diag::err_pp_unterminated_conditional);
    145     ConditionalStack.pop_back();
    146   }
    147 
    148   // Finally, let the preprocessor handle this.
    149   return PP->HandleEndOfFile(Result);
    150 }
    151 
    152 // FIXME: We can just grab the last token instead of storing a copy
    153 // into EofToken.
    154 void PTHLexer::getEOF(Token& Tok) {
    155   assert(EofToken.is(tok::eof));
    156   Tok = EofToken;
    157 }
    158 
    159 void PTHLexer::DiscardToEndOfLine() {
    160   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
    161          "Must be in a preprocessing directive!");
    162 
    163   // We assume that if the preprocessor wishes to discard to the end of
    164   // the line that it also means to end the current preprocessor directive.
    165   ParsingPreprocessorDirective = false;
    166 
    167   // Skip tokens by only peeking at their token kind and the flags.
    168   // We don't need to actually reconstruct full tokens from the token buffer.
    169   // This saves some copies and it also reduces IdentifierInfo* lookup.
    170   const unsigned char* p = CurPtr;
    171   while (1) {
    172     // Read the token kind.  Are we at the end of the file?
    173     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
    174     if (x == tok::eof) break;
    175 
    176     // Read the token flags.  Are we at the start of the next line?
    177     Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
    178     if (y & Token::StartOfLine) break;
    179 
    180     // Skip to the next token.
    181     p += StoredTokenSize;
    182   }
    183 
    184   CurPtr = p;
    185 }
    186 
    187 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
    188 bool PTHLexer::SkipBlock() {
    189   using namespace llvm::support;
    190   assert(CurPPCondPtr && "No cached PP conditional information.");
    191   assert(LastHashTokPtr && "No known '#' token.");
    192 
    193   const unsigned char *HashEntryI = nullptr;
    194   uint32_t TableIdx;
    195 
    196   do {
    197     // Read the token offset from the side-table.
    198     uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
    199 
    200     // Read the target table index from the side-table.
    201     TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
    202 
    203     // Compute the actual memory address of the '#' token data for this entry.
    204     HashEntryI = TokBuf + Offset;
    205 
    206     // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
    207     //  contain nested blocks.  In the side-table we can jump over these
    208     //  nested blocks instead of doing a linear search if the next "sibling"
    209     //  entry is not at a location greater than LastHashTokPtr.
    210     if (HashEntryI < LastHashTokPtr && TableIdx) {
    211       // In the side-table we are still at an entry for a '#' token that
    212       // is earlier than the last one we saw.  Check if the location we would
    213       // stride gets us closer.
    214       const unsigned char* NextPPCondPtr =
    215         PPCond + TableIdx*(sizeof(uint32_t)*2);
    216       assert(NextPPCondPtr >= CurPPCondPtr);
    217       // Read where we should jump to.
    218       const unsigned char *HashEntryJ =
    219           TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
    220 
    221       if (HashEntryJ <= LastHashTokPtr) {
    222         // Jump directly to the next entry in the side table.
    223         HashEntryI = HashEntryJ;
    224         TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
    225         CurPPCondPtr = NextPPCondPtr;
    226       }
    227     }
    228   }
    229   while (HashEntryI < LastHashTokPtr);
    230   assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
    231   assert(TableIdx && "No jumping from #endifs.");
    232 
    233   // Update our side-table iterator.
    234   const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
    235   assert(NextPPCondPtr >= CurPPCondPtr);
    236   CurPPCondPtr = NextPPCondPtr;
    237 
    238   // Read where we should jump to.
    239   HashEntryI =
    240       TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
    241   uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
    242 
    243   // By construction NextIdx will be zero if this is a #endif.  This is useful
    244   // to know to obviate lexing another token.
    245   bool isEndif = NextIdx == 0;
    246 
    247   // This case can occur when we see something like this:
    248   //
    249   //  #if ...
    250   //   /* a comment or nothing */
    251   //  #elif
    252   //
    253   // If we are skipping the first #if block it will be the case that CurPtr
    254   // already points 'elif'.  Just return.
    255 
    256   if (CurPtr > HashEntryI) {
    257     assert(CurPtr == HashEntryI + StoredTokenSize);
    258     // Did we reach a #endif?  If so, go ahead and consume that token as well.
    259     if (isEndif)
    260       CurPtr += StoredTokenSize * 2;
    261     else
    262       LastHashTokPtr = HashEntryI;
    263 
    264     return isEndif;
    265   }
    266 
    267   // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
    268   CurPtr = HashEntryI;
    269 
    270   // Update the location of the last observed '#'.  This is useful if we
    271   // are skipping multiple blocks.
    272   LastHashTokPtr = CurPtr;
    273 
    274   // Skip the '#' token.
    275   assert(((tok::TokenKind)*CurPtr) == tok::hash);
    276   CurPtr += StoredTokenSize;
    277 
    278   // Did we reach a #endif?  If so, go ahead and consume that token as well.
    279   if (isEndif) {
    280     CurPtr += StoredTokenSize * 2;
    281   }
    282 
    283   return isEndif;
    284 }
    285 
    286 SourceLocation PTHLexer::getSourceLocation() {
    287   // getSourceLocation is not on the hot path.  It is used to get the location
    288   // of the next token when transitioning back to this lexer when done
    289   // handling a #included file.  Just read the necessary data from the token
    290   // data buffer to construct the SourceLocation object.
    291   // NOTE: This is a virtual function; hence it is defined out-of-line.
    292   using namespace llvm::support;
    293 
    294   const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
    295   uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
    296   return FileStartLoc.getLocWithOffset(Offset);
    297 }
    298 
    299 //===----------------------------------------------------------------------===//
    300 // PTH file lookup: map from strings to file data.
    301 //===----------------------------------------------------------------------===//
    302 
    303 /// PTHFileLookup - This internal data structure is used by the PTHManager
    304 ///  to map from FileEntry objects managed by FileManager to offsets within
    305 ///  the PTH file.
    306 namespace {
    307 class PTHFileData {
    308   const uint32_t TokenOff;
    309   const uint32_t PPCondOff;
    310 public:
    311   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
    312     : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
    313 
    314   uint32_t getTokenOffset() const { return TokenOff; }
    315   uint32_t getPPCondOffset() const { return PPCondOff; }
    316 };
    317 
    318 
    319 class PTHFileLookupCommonTrait {
    320 public:
    321   typedef std::pair<unsigned char, const char*> internal_key_type;
    322   typedef unsigned hash_value_type;
    323   typedef unsigned offset_type;
    324 
    325   static hash_value_type ComputeHash(internal_key_type x) {
    326     return llvm::HashString(x.second);
    327   }
    328 
    329   static std::pair<unsigned, unsigned>
    330   ReadKeyDataLength(const unsigned char*& d) {
    331     using namespace llvm::support;
    332     unsigned keyLen =
    333         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
    334     unsigned dataLen = (unsigned) *(d++);
    335     return std::make_pair(keyLen, dataLen);
    336   }
    337 
    338   static internal_key_type ReadKey(const unsigned char* d, unsigned) {
    339     unsigned char k = *(d++); // Read the entry kind.
    340     return std::make_pair(k, (const char*) d);
    341   }
    342 };
    343 
    344 } // end anonymous namespace
    345 
    346 class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait {
    347 public:
    348   typedef const FileEntry* external_key_type;
    349   typedef PTHFileData      data_type;
    350 
    351   static internal_key_type GetInternalKey(const FileEntry* FE) {
    352     return std::make_pair((unsigned char) 0x1, FE->getName());
    353   }
    354 
    355   static bool EqualKey(internal_key_type a, internal_key_type b) {
    356     return a.first == b.first && strcmp(a.second, b.second) == 0;
    357   }
    358 
    359   static PTHFileData ReadData(const internal_key_type& k,
    360                               const unsigned char* d, unsigned) {
    361     assert(k.first == 0x1 && "Only file lookups can match!");
    362     using namespace llvm::support;
    363     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
    364     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
    365     return PTHFileData(x, y);
    366   }
    367 };
    368 
    369 class PTHManager::PTHStringLookupTrait {
    370 public:
    371   typedef uint32_t data_type;
    372   typedef const std::pair<const char*, unsigned> external_key_type;
    373   typedef external_key_type internal_key_type;
    374   typedef uint32_t hash_value_type;
    375   typedef unsigned offset_type;
    376 
    377   static bool EqualKey(const internal_key_type& a,
    378                        const internal_key_type& b) {
    379     return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
    380                                   : false;
    381   }
    382 
    383   static hash_value_type ComputeHash(const internal_key_type& a) {
    384     return llvm::HashString(StringRef(a.first, a.second));
    385   }
    386 
    387   // This hopefully will just get inlined and removed by the optimizer.
    388   static const internal_key_type&
    389   GetInternalKey(const external_key_type& x) { return x; }
    390 
    391   static std::pair<unsigned, unsigned>
    392   ReadKeyDataLength(const unsigned char*& d) {
    393     using namespace llvm::support;
    394     return std::make_pair(
    395         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
    396         sizeof(uint32_t));
    397   }
    398 
    399   static std::pair<const char*, unsigned>
    400   ReadKey(const unsigned char* d, unsigned n) {
    401       assert(n >= 2 && d[n-1] == '\0');
    402       return std::make_pair((const char*) d, n-1);
    403     }
    404 
    405   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
    406                            unsigned) {
    407     using namespace llvm::support;
    408     return endian::readNext<uint32_t, little, unaligned>(d);
    409   }
    410 };
    411 
    412 //===----------------------------------------------------------------------===//
    413 // PTHManager methods.
    414 //===----------------------------------------------------------------------===//
    415 
    416 PTHManager::PTHManager(
    417     std::unique_ptr<const llvm::MemoryBuffer> buf,
    418     std::unique_ptr<PTHFileLookup> fileLookup, const unsigned char *idDataTable,
    419     std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> perIDCache,
    420     std::unique_ptr<PTHStringIdLookup> stringIdLookup, unsigned numIds,
    421     const unsigned char *spellingBase, const char *originalSourceFile)
    422     : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)),
    423       FileLookup(std::move(fileLookup)), IdDataTable(idDataTable),
    424       StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr),
    425       SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {}
    426 
    427 PTHManager::~PTHManager() {
    428 }
    429 
    430 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
    431   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
    432 }
    433 
    434 PTHManager *PTHManager::Create(StringRef file, DiagnosticsEngine &Diags) {
    435   // Memory map the PTH file.
    436   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
    437       llvm::MemoryBuffer::getFile(file);
    438 
    439   if (!FileOrErr) {
    440     // FIXME: Add ec.message() to this diag.
    441     Diags.Report(diag::err_invalid_pth_file) << file;
    442     return nullptr;
    443   }
    444   std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
    445 
    446   using namespace llvm::support;
    447 
    448   // Get the buffer ranges and check if there are at least three 32-bit
    449   // words at the end of the file.
    450   const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
    451   const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
    452 
    453   // Check the prologue of the file.
    454   if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
    455       memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
    456     Diags.Report(diag::err_invalid_pth_file) << file;
    457     return nullptr;
    458   }
    459 
    460   // Read the PTH version.
    461   const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
    462   unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
    463 
    464   if (Version < PTHManager::Version) {
    465     InvalidPTH(Diags,
    466         Version < PTHManager::Version
    467         ? "PTH file uses an older PTH format that is no longer supported"
    468         : "PTH file uses a newer PTH format that cannot be read");
    469     return nullptr;
    470   }
    471 
    472   // Compute the address of the index table at the end of the PTH file.
    473   const unsigned char *PrologueOffset = p;
    474 
    475   if (PrologueOffset >= BufEnd) {
    476     Diags.Report(diag::err_invalid_pth_file) << file;
    477     return nullptr;
    478   }
    479 
    480   // Construct the file lookup table.  This will be used for mapping from
    481   // FileEntry*'s to cached tokens.
    482   const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
    483   const unsigned char *FileTable =
    484       BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
    485 
    486   if (!(FileTable > BufBeg && FileTable < BufEnd)) {
    487     Diags.Report(diag::err_invalid_pth_file) << file;
    488     return nullptr; // FIXME: Proper error diagnostic?
    489   }
    490 
    491   std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
    492 
    493   // Warn if the PTH file is empty.  We still want to create a PTHManager
    494   // as the PTH could be used with -include-pth.
    495   if (FL->isEmpty())
    496     InvalidPTH(Diags, "PTH file contains no cached source data");
    497 
    498   // Get the location of the table mapping from persistent ids to the
    499   // data needed to reconstruct identifiers.
    500   const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
    501   const unsigned char *IData =
    502       BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
    503 
    504   if (!(IData >= BufBeg && IData < BufEnd)) {
    505     Diags.Report(diag::err_invalid_pth_file) << file;
    506     return nullptr;
    507   }
    508 
    509   // Get the location of the hashtable mapping between strings and
    510   // persistent IDs.
    511   const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
    512   const unsigned char *StringIdTable =
    513       BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
    514   if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
    515     Diags.Report(diag::err_invalid_pth_file) << file;
    516     return nullptr;
    517   }
    518 
    519   std::unique_ptr<PTHStringIdLookup> SL(
    520       PTHStringIdLookup::Create(StringIdTable, BufBeg));
    521 
    522   // Get the location of the spelling cache.
    523   const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
    524   const unsigned char *spellingBase =
    525       BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
    526   if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
    527     Diags.Report(diag::err_invalid_pth_file) << file;
    528     return nullptr;
    529   }
    530 
    531   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
    532   uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
    533 
    534   // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
    535   // so that we in the best case only zero out memory once when the OS returns
    536   // us new pages.
    537   std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> PerIDCache;
    538 
    539   if (NumIds) {
    540     PerIDCache.reset((IdentifierInfo **)calloc(NumIds, sizeof(PerIDCache[0])));
    541     if (!PerIDCache) {
    542       InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
    543       return nullptr;
    544     }
    545   }
    546 
    547   // Compute the address of the original source file.
    548   const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
    549   unsigned len =
    550       endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
    551   if (!len) originalSourceBase = nullptr;
    552 
    553   // Create the new PTHManager.
    554   return new PTHManager(std::move(File), std::move(FL), IData,
    555                         std::move(PerIDCache), std::move(SL), NumIds,
    556                         spellingBase, (const char *)originalSourceBase);
    557 }
    558 
    559 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
    560   using namespace llvm::support;
    561   // Look in the PTH file for the string data for the IdentifierInfo object.
    562   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
    563   const unsigned char *IDData =
    564       (const unsigned char *)Buf->getBufferStart() +
    565       endian::readNext<uint32_t, little, aligned>(TableEntry);
    566   assert(IDData < (const unsigned char*)Buf->getBufferEnd());
    567 
    568   // Allocate the object.
    569   std::pair<IdentifierInfo,const unsigned char*> *Mem =
    570     Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
    571 
    572   Mem->second = IDData;
    573   assert(IDData[0] != '\0');
    574   IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
    575 
    576   // Store the new IdentifierInfo in the cache.
    577   PerIDCache[PersistentID] = II;
    578   assert(II->getNameStart() && II->getNameStart()[0] != '\0');
    579   return II;
    580 }
    581 
    582 IdentifierInfo* PTHManager::get(StringRef Name) {
    583   // Double check our assumption that the last character isn't '\0'.
    584   assert(Name.empty() || Name.back() != '\0');
    585   PTHStringIdLookup::iterator I =
    586       StringIdLookup->find(std::make_pair(Name.data(), Name.size()));
    587   if (I == StringIdLookup->end()) // No identifier found?
    588     return nullptr;
    589 
    590   // Match found.  Return the identifier!
    591   assert(*I > 0);
    592   return GetIdentifierInfo(*I-1);
    593 }
    594 
    595 PTHLexer *PTHManager::CreateLexer(FileID FID) {
    596   const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
    597   if (!FE)
    598     return nullptr;
    599 
    600   using namespace llvm::support;
    601 
    602   // Lookup the FileEntry object in our file lookup data structure.  It will
    603   // return a variant that indicates whether or not there is an offset within
    604   // the PTH file that contains cached tokens.
    605   PTHFileLookup::iterator I = FileLookup->find(FE);
    606 
    607   if (I == FileLookup->end()) // No tokens available?
    608     return nullptr;
    609 
    610   const PTHFileData& FileData = *I;
    611 
    612   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
    613   // Compute the offset of the token data within the buffer.
    614   const unsigned char* data = BufStart + FileData.getTokenOffset();
    615 
    616   // Get the location of pp-conditional table.
    617   const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
    618   uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
    619   if (Len == 0) ppcond = nullptr;
    620 
    621   assert(PP && "No preprocessor set yet!");
    622   return new PTHLexer(*PP, FID, data, ppcond, *this);
    623 }
    624 
    625 //===----------------------------------------------------------------------===//
    626 // 'stat' caching.
    627 //===----------------------------------------------------------------------===//
    628 
    629 namespace {
    630 class PTHStatData {
    631 public:
    632   const bool HasData;
    633   uint64_t Size;
    634   time_t ModTime;
    635   llvm::sys::fs::UniqueID UniqueID;
    636   bool IsDirectory;
    637 
    638   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
    639               bool IsDirectory)
    640       : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
    641         IsDirectory(IsDirectory) {}
    642 
    643   PTHStatData() : HasData(false) {}
    644 };
    645 
    646 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
    647 public:
    648   typedef const char* external_key_type;  // const char*
    649   typedef PTHStatData data_type;
    650 
    651   static internal_key_type GetInternalKey(const char *path) {
    652     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
    653     return std::make_pair((unsigned char) 0x0, path);
    654   }
    655 
    656   static bool EqualKey(internal_key_type a, internal_key_type b) {
    657     // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
    658     // just the paths.
    659     return strcmp(a.second, b.second) == 0;
    660   }
    661 
    662   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
    663                             unsigned) {
    664 
    665     if (k.first /* File or Directory */) {
    666       bool IsDirectory = true;
    667       if (k.first == 0x1 /* File */) {
    668         IsDirectory = false;
    669         d += 4 * 2; // Skip the first 2 words.
    670       }
    671 
    672       using namespace llvm::support;
    673 
    674       uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
    675       uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
    676       llvm::sys::fs::UniqueID UniqueID(Device, File);
    677       time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
    678       uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
    679       return data_type(Size, ModTime, UniqueID, IsDirectory);
    680     }
    681 
    682     // Negative stat.  Don't read anything.
    683     return data_type();
    684   }
    685 };
    686 } // end anonymous namespace
    687 
    688 namespace clang {
    689 class PTHStatCache : public FileSystemStatCache {
    690   typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
    691   CacheTy Cache;
    692 
    693 public:
    694   PTHStatCache(PTHManager::PTHFileLookup &FL)
    695       : Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
    696               FL.getBase()) {}
    697 
    698   LookupResult getStat(const char *Path, FileData &Data, bool isFile,
    699                        std::unique_ptr<vfs::File> *F,
    700                        vfs::FileSystem &FS) override {
    701     // Do the lookup for the file's data in the PTH file.
    702     CacheTy::iterator I = Cache.find(Path);
    703 
    704     // If we don't get a hit in the PTH file just forward to 'stat'.
    705     if (I == Cache.end())
    706       return statChained(Path, Data, isFile, F, FS);
    707 
    708     const PTHStatData &D = *I;
    709 
    710     if (!D.HasData)
    711       return CacheMissing;
    712 
    713     Data.Name = Path;
    714     Data.Size = D.Size;
    715     Data.ModTime = D.ModTime;
    716     Data.UniqueID = D.UniqueID;
    717     Data.IsDirectory = D.IsDirectory;
    718     Data.IsNamedPipe = false;
    719     Data.InPCH = true;
    720 
    721     return CacheExists;
    722   }
    723 };
    724 }
    725 
    726 std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() {
    727   return llvm::make_unique<PTHStatCache>(*FileLookup);
    728 }
    729