Home | History | Annotate | Download | only in Lex
      1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This file implements the Preprocessor interface.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 //
     14 // Options to support:
     15 //   -H       - Print the name of each header file used.
     16 //   -d[DNI] - Dump various things.
     17 //   -fworking-directory - #line's with preprocessor's working dir.
     18 //   -fpreprocessed
     19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
     20 //   -W*
     21 //   -w
     22 //
     23 // Messages to emit:
     24 //   "Multiple include guards may be useful for:\n"
     25 //
     26 //===----------------------------------------------------------------------===//
     27 
     28 #include "clang/Lex/Preprocessor.h"
     29 #include "clang/Basic/FileManager.h"
     30 #include "clang/Basic/FileSystemStatCache.h"
     31 #include "clang/Basic/SourceManager.h"
     32 #include "clang/Basic/TargetInfo.h"
     33 #include "clang/Lex/CodeCompletionHandler.h"
     34 #include "clang/Lex/ExternalPreprocessorSource.h"
     35 #include "clang/Lex/HeaderSearch.h"
     36 #include "clang/Lex/LexDiagnostic.h"
     37 #include "clang/Lex/LiteralSupport.h"
     38 #include "clang/Lex/MacroArgs.h"
     39 #include "clang/Lex/MacroInfo.h"
     40 #include "clang/Lex/ModuleLoader.h"
     41 #include "clang/Lex/PTHManager.h"
     42 #include "clang/Lex/Pragma.h"
     43 #include "clang/Lex/PreprocessingRecord.h"
     44 #include "clang/Lex/PreprocessorOptions.h"
     45 #include "clang/Lex/ScratchBuffer.h"
     46 #include "llvm/ADT/APFloat.h"
     47 #include "llvm/ADT/STLExtras.h"
     48 #include "llvm/ADT/SmallString.h"
     49 #include "llvm/ADT/StringExtras.h"
     50 #include "llvm/Support/Capacity.h"
     51 #include "llvm/Support/ConvertUTF.h"
     52 #include "llvm/Support/MemoryBuffer.h"
     53 #include "llvm/Support/raw_ostream.h"
     54 #include <utility>
     55 using namespace clang;
     56 
     57 template class llvm::Registry<clang::PragmaHandler>;
     58 
     59 //===----------------------------------------------------------------------===//
     60 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
     61 
     62 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
     63                            DiagnosticsEngine &diags, LangOptions &opts,
     64                            SourceManager &SM, HeaderSearch &Headers,
     65                            ModuleLoader &TheModuleLoader,
     66                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
     67                            TranslationUnitKind TUKind)
     68     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
     69       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
     70       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
     71       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
     72       Identifiers(opts, IILookup),
     73       PragmaHandlers(new PragmaNamespace(StringRef())),
     74       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
     75       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
     76       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
     77       CodeCompletionReached(0), MainFileDir(nullptr),
     78       SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
     79       CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
     80       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
     81       Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
     82   OwnsHeaderSearch = OwnsHeaders;
     83 
     84   CounterValue = 0; // __COUNTER__ starts at 0.
     85 
     86   // Clear stats.
     87   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
     88   NumIf = NumElse = NumEndif = 0;
     89   NumEnteredSourceFiles = 0;
     90   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
     91   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
     92   MaxIncludeStackDepth = 0;
     93   NumSkipped = 0;
     94 
     95   // Default to discarding comments.
     96   KeepComments = false;
     97   KeepMacroComments = false;
     98   SuppressIncludeNotFoundError = false;
     99 
    100   // Macro expansion is enabled.
    101   DisableMacroExpansion = false;
    102   MacroExpansionInDirectivesOverride = false;
    103   InMacroArgs = false;
    104   InMacroArgPreExpansion = false;
    105   NumCachedTokenLexers = 0;
    106   PragmasEnabled = true;
    107   ParsingIfOrElifDirective = false;
    108   PreprocessedOutput = false;
    109 
    110   CachedLexPos = 0;
    111 
    112   // We haven't read anything from the external source.
    113   ReadMacrosFromExternalSource = false;
    114 
    115   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
    116   // This gets unpoisoned where it is allowed.
    117   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
    118   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
    119 
    120   // Initialize the pragma handlers.
    121   RegisterBuiltinPragmas();
    122 
    123   // Initialize builtin macros like __LINE__ and friends.
    124   RegisterBuiltinMacros();
    125 
    126   if(LangOpts.Borland) {
    127     Ident__exception_info        = getIdentifierInfo("_exception_info");
    128     Ident___exception_info       = getIdentifierInfo("__exception_info");
    129     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
    130     Ident__exception_code        = getIdentifierInfo("_exception_code");
    131     Ident___exception_code       = getIdentifierInfo("__exception_code");
    132     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
    133     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
    134     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
    135     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
    136   } else {
    137     Ident__exception_info = Ident__exception_code = nullptr;
    138     Ident__abnormal_termination = Ident___exception_info = nullptr;
    139     Ident___exception_code = Ident___abnormal_termination = nullptr;
    140     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
    141     Ident_AbnormalTermination = nullptr;
    142   }
    143 }
    144 
    145 Preprocessor::~Preprocessor() {
    146   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
    147 
    148   IncludeMacroStack.clear();
    149 
    150   // Destroy any macro definitions.
    151   while (MacroInfoChain *I = MIChainHead) {
    152     MIChainHead = I->Next;
    153     I->~MacroInfoChain();
    154   }
    155 
    156   // Free any cached macro expanders.
    157   // This populates MacroArgCache, so all TokenLexers need to be destroyed
    158   // before the code below that frees up the MacroArgCache list.
    159   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
    160   CurTokenLexer.reset();
    161 
    162   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
    163     DeserialMIChainHead = I->Next;
    164     I->~DeserializedMacroInfoChain();
    165   }
    166 
    167   // Free any cached MacroArgs.
    168   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
    169     ArgList = ArgList->deallocate();
    170 
    171   // Delete the header search info, if we own it.
    172   if (OwnsHeaderSearch)
    173     delete &HeaderInfo;
    174 }
    175 
    176 void Preprocessor::Initialize(const TargetInfo &Target,
    177                               const TargetInfo *AuxTarget) {
    178   assert((!this->Target || this->Target == &Target) &&
    179          "Invalid override of target information");
    180   this->Target = &Target;
    181 
    182   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
    183          "Invalid override of aux target information.");
    184   this->AuxTarget = AuxTarget;
    185 
    186   // Initialize information about built-ins.
    187   BuiltinInfo.InitializeTarget(Target, AuxTarget);
    188   HeaderInfo.setTarget(Target);
    189 }
    190 
    191 void Preprocessor::InitializeForModelFile() {
    192   NumEnteredSourceFiles = 0;
    193 
    194   // Reset pragmas
    195   PragmaHandlersBackup = std::move(PragmaHandlers);
    196   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
    197   RegisterBuiltinPragmas();
    198 
    199   // Reset PredefinesFileID
    200   PredefinesFileID = FileID();
    201 }
    202 
    203 void Preprocessor::FinalizeForModelFile() {
    204   NumEnteredSourceFiles = 1;
    205 
    206   PragmaHandlers = std::move(PragmaHandlersBackup);
    207 }
    208 
    209 void Preprocessor::setPTHManager(PTHManager* pm) {
    210   PTH.reset(pm);
    211   FileMgr.addStatCache(PTH->createStatCache());
    212 }
    213 
    214 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
    215   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
    216                << getSpelling(Tok) << "'";
    217 
    218   if (!DumpFlags) return;
    219 
    220   llvm::errs() << "\t";
    221   if (Tok.isAtStartOfLine())
    222     llvm::errs() << " [StartOfLine]";
    223   if (Tok.hasLeadingSpace())
    224     llvm::errs() << " [LeadingSpace]";
    225   if (Tok.isExpandDisabled())
    226     llvm::errs() << " [ExpandDisabled]";
    227   if (Tok.needsCleaning()) {
    228     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
    229     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
    230                  << "']";
    231   }
    232 
    233   llvm::errs() << "\tLoc=<";
    234   DumpLocation(Tok.getLocation());
    235   llvm::errs() << ">";
    236 }
    237 
    238 void Preprocessor::DumpLocation(SourceLocation Loc) const {
    239   Loc.dump(SourceMgr);
    240 }
    241 
    242 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
    243   llvm::errs() << "MACRO: ";
    244   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
    245     DumpToken(MI.getReplacementToken(i));
    246     llvm::errs() << "  ";
    247   }
    248   llvm::errs() << "\n";
    249 }
    250 
    251 void Preprocessor::PrintStats() {
    252   llvm::errs() << "\n*** Preprocessor Stats:\n";
    253   llvm::errs() << NumDirectives << " directives found:\n";
    254   llvm::errs() << "  " << NumDefined << " #define.\n";
    255   llvm::errs() << "  " << NumUndefined << " #undef.\n";
    256   llvm::errs() << "  #include/#include_next/#import:\n";
    257   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
    258   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
    259   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
    260   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
    261   llvm::errs() << "  " << NumEndif << " #endif.\n";
    262   llvm::errs() << "  " << NumPragma << " #pragma.\n";
    263   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
    264 
    265   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
    266              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
    267              << NumFastMacroExpanded << " on the fast path.\n";
    268   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
    269              << " token paste (##) operations performed, "
    270              << NumFastTokenPaste << " on the fast path.\n";
    271 
    272   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
    273 
    274   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
    275   llvm::errs() << "\n  Macro Expanded Tokens: "
    276                << llvm::capacity_in_bytes(MacroExpandedTokens);
    277   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
    278   // FIXME: List information for all submodules.
    279   llvm::errs() << "\n  Macros: "
    280                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
    281   llvm::errs() << "\n  #pragma push_macro Info: "
    282                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
    283   llvm::errs() << "\n  Poison Reasons: "
    284                << llvm::capacity_in_bytes(PoisonReasons);
    285   llvm::errs() << "\n  Comment Handlers: "
    286                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
    287 }
    288 
    289 Preprocessor::macro_iterator
    290 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
    291   if (IncludeExternalMacros && ExternalSource &&
    292       !ReadMacrosFromExternalSource) {
    293     ReadMacrosFromExternalSource = true;
    294     ExternalSource->ReadDefinedMacros();
    295   }
    296 
    297   // Make sure we cover all macros in visible modules.
    298   for (const ModuleMacro &Macro : ModuleMacros)
    299     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
    300 
    301   return CurSubmoduleState->Macros.begin();
    302 }
    303 
    304 size_t Preprocessor::getTotalMemory() const {
    305   return BP.getTotalMemory()
    306     + llvm::capacity_in_bytes(MacroExpandedTokens)
    307     + Predefines.capacity() /* Predefines buffer. */
    308     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
    309     // and ModuleMacros.
    310     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
    311     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
    312     + llvm::capacity_in_bytes(PoisonReasons)
    313     + llvm::capacity_in_bytes(CommentHandlers);
    314 }
    315 
    316 Preprocessor::macro_iterator
    317 Preprocessor::macro_end(bool IncludeExternalMacros) const {
    318   if (IncludeExternalMacros && ExternalSource &&
    319       !ReadMacrosFromExternalSource) {
    320     ReadMacrosFromExternalSource = true;
    321     ExternalSource->ReadDefinedMacros();
    322   }
    323 
    324   return CurSubmoduleState->Macros.end();
    325 }
    326 
    327 /// \brief Compares macro tokens with a specified token value sequence.
    328 static bool MacroDefinitionEquals(const MacroInfo *MI,
    329                                   ArrayRef<TokenValue> Tokens) {
    330   return Tokens.size() == MI->getNumTokens() &&
    331       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
    332 }
    333 
    334 StringRef Preprocessor::getLastMacroWithSpelling(
    335                                     SourceLocation Loc,
    336                                     ArrayRef<TokenValue> Tokens) const {
    337   SourceLocation BestLocation;
    338   StringRef BestSpelling;
    339   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
    340        I != E; ++I) {
    341     const MacroDirective::DefInfo
    342       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
    343     if (!Def || !Def.getMacroInfo())
    344       continue;
    345     if (!Def.getMacroInfo()->isObjectLike())
    346       continue;
    347     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
    348       continue;
    349     SourceLocation Location = Def.getLocation();
    350     // Choose the macro defined latest.
    351     if (BestLocation.isInvalid() ||
    352         (Location.isValid() &&
    353          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
    354       BestLocation = Location;
    355       BestSpelling = I->first->getName();
    356     }
    357   }
    358   return BestSpelling;
    359 }
    360 
    361 void Preprocessor::recomputeCurLexerKind() {
    362   if (CurLexer)
    363     CurLexerKind = CLK_Lexer;
    364   else if (CurPTHLexer)
    365     CurLexerKind = CLK_PTHLexer;
    366   else if (CurTokenLexer)
    367     CurLexerKind = CLK_TokenLexer;
    368   else
    369     CurLexerKind = CLK_CachingLexer;
    370 }
    371 
    372 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
    373                                           unsigned CompleteLine,
    374                                           unsigned CompleteColumn) {
    375   assert(File);
    376   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
    377   assert(!CodeCompletionFile && "Already set");
    378 
    379   using llvm::MemoryBuffer;
    380 
    381   // Load the actual file's contents.
    382   bool Invalid = false;
    383   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
    384   if (Invalid)
    385     return true;
    386 
    387   // Find the byte position of the truncation point.
    388   const char *Position = Buffer->getBufferStart();
    389   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
    390     for (; *Position; ++Position) {
    391       if (*Position != '\r' && *Position != '\n')
    392         continue;
    393 
    394       // Eat \r\n or \n\r as a single line.
    395       if ((Position[1] == '\r' || Position[1] == '\n') &&
    396           Position[0] != Position[1])
    397         ++Position;
    398       ++Position;
    399       break;
    400     }
    401   }
    402 
    403   Position += CompleteColumn - 1;
    404 
    405   // If pointing inside the preamble, adjust the position at the beginning of
    406   // the file after the preamble.
    407   if (SkipMainFilePreamble.first &&
    408       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
    409     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
    410       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
    411   }
    412 
    413   if (Position > Buffer->getBufferEnd())
    414     Position = Buffer->getBufferEnd();
    415 
    416   CodeCompletionFile = File;
    417   CodeCompletionOffset = Position - Buffer->getBufferStart();
    418 
    419   std::unique_ptr<MemoryBuffer> NewBuffer =
    420       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
    421                                           Buffer->getBufferIdentifier());
    422   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
    423   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
    424   *NewPos = '\0';
    425   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
    426   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
    427 
    428   return false;
    429 }
    430 
    431 void Preprocessor::CodeCompleteNaturalLanguage() {
    432   if (CodeComplete)
    433     CodeComplete->CodeCompleteNaturalLanguage();
    434   setCodeCompletionReached();
    435 }
    436 
    437 /// getSpelling - This method is used to get the spelling of a token into a
    438 /// SmallVector. Note that the returned StringRef may not point to the
    439 /// supplied buffer if a copy can be avoided.
    440 StringRef Preprocessor::getSpelling(const Token &Tok,
    441                                           SmallVectorImpl<char> &Buffer,
    442                                           bool *Invalid) const {
    443   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
    444   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
    445     // Try the fast path.
    446     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
    447       return II->getName();
    448   }
    449 
    450   // Resize the buffer if we need to copy into it.
    451   if (Tok.needsCleaning())
    452     Buffer.resize(Tok.getLength());
    453 
    454   const char *Ptr = Buffer.data();
    455   unsigned Len = getSpelling(Tok, Ptr, Invalid);
    456   return StringRef(Ptr, Len);
    457 }
    458 
    459 /// CreateString - Plop the specified string into a scratch buffer and return a
    460 /// location for it.  If specified, the source location provides a source
    461 /// location for the token.
    462 void Preprocessor::CreateString(StringRef Str, Token &Tok,
    463                                 SourceLocation ExpansionLocStart,
    464                                 SourceLocation ExpansionLocEnd) {
    465   Tok.setLength(Str.size());
    466 
    467   const char *DestPtr;
    468   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
    469 
    470   if (ExpansionLocStart.isValid())
    471     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
    472                                        ExpansionLocEnd, Str.size());
    473   Tok.setLocation(Loc);
    474 
    475   // If this is a raw identifier or a literal token, set the pointer data.
    476   if (Tok.is(tok::raw_identifier))
    477     Tok.setRawIdentifierData(DestPtr);
    478   else if (Tok.isLiteral())
    479     Tok.setLiteralData(DestPtr);
    480 }
    481 
    482 Module *Preprocessor::getCurrentModule() {
    483   if (!getLangOpts().CompilingModule)
    484     return nullptr;
    485 
    486   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
    487 }
    488 
    489 //===----------------------------------------------------------------------===//
    490 // Preprocessor Initialization Methods
    491 //===----------------------------------------------------------------------===//
    492 
    493 
    494 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
    495 /// which implicitly adds the builtin defines etc.
    496 void Preprocessor::EnterMainSourceFile() {
    497   // We do not allow the preprocessor to reenter the main file.  Doing so will
    498   // cause FileID's to accumulate information from both runs (e.g. #line
    499   // information) and predefined macros aren't guaranteed to be set properly.
    500   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
    501   FileID MainFileID = SourceMgr.getMainFileID();
    502 
    503   // If MainFileID is loaded it means we loaded an AST file, no need to enter
    504   // a main file.
    505   if (!SourceMgr.isLoadedFileID(MainFileID)) {
    506     // Enter the main file source buffer.
    507     EnterSourceFile(MainFileID, nullptr, SourceLocation());
    508 
    509     // If we've been asked to skip bytes in the main file (e.g., as part of a
    510     // precompiled preamble), do so now.
    511     if (SkipMainFilePreamble.first > 0)
    512       CurLexer->SkipBytes(SkipMainFilePreamble.first,
    513                           SkipMainFilePreamble.second);
    514 
    515     // Tell the header info that the main file was entered.  If the file is later
    516     // #imported, it won't be re-entered.
    517     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
    518       HeaderInfo.IncrementIncludeCount(FE);
    519   }
    520 
    521   // Preprocess Predefines to populate the initial preprocessor state.
    522   std::unique_ptr<llvm::MemoryBuffer> SB =
    523     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
    524   assert(SB && "Cannot create predefined source buffer");
    525   FileID FID = SourceMgr.createFileID(std::move(SB));
    526   assert(FID.isValid() && "Could not create FileID for predefines?");
    527   setPredefinesFileID(FID);
    528 
    529   // Start parsing the predefines.
    530   EnterSourceFile(FID, nullptr, SourceLocation());
    531 }
    532 
    533 void Preprocessor::EndSourceFile() {
    534   // Notify the client that we reached the end of the source file.
    535   if (Callbacks)
    536     Callbacks->EndOfMainFile();
    537 }
    538 
    539 //===----------------------------------------------------------------------===//
    540 // Lexer Event Handling.
    541 //===----------------------------------------------------------------------===//
    542 
    543 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
    544 /// identifier information for the token and install it into the token,
    545 /// updating the token kind accordingly.
    546 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
    547   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
    548 
    549   // Look up this token, see if it is a macro, or if it is a language keyword.
    550   IdentifierInfo *II;
    551   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
    552     // No cleaning needed, just use the characters from the lexed buffer.
    553     II = getIdentifierInfo(Identifier.getRawIdentifier());
    554   } else {
    555     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
    556     SmallString<64> IdentifierBuffer;
    557     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
    558 
    559     if (Identifier.hasUCN()) {
    560       SmallString<64> UCNIdentifierBuffer;
    561       expandUCNs(UCNIdentifierBuffer, CleanedStr);
    562       II = getIdentifierInfo(UCNIdentifierBuffer);
    563     } else {
    564       II = getIdentifierInfo(CleanedStr);
    565     }
    566   }
    567 
    568   // Update the token info (identifier info and appropriate token kind).
    569   Identifier.setIdentifierInfo(II);
    570   Identifier.setKind(II->getTokenID());
    571 
    572   return II;
    573 }
    574 
    575 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
    576   PoisonReasons[II] = DiagID;
    577 }
    578 
    579 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
    580   assert(Ident__exception_code && Ident__exception_info);
    581   assert(Ident___exception_code && Ident___exception_info);
    582   Ident__exception_code->setIsPoisoned(Poison);
    583   Ident___exception_code->setIsPoisoned(Poison);
    584   Ident_GetExceptionCode->setIsPoisoned(Poison);
    585   Ident__exception_info->setIsPoisoned(Poison);
    586   Ident___exception_info->setIsPoisoned(Poison);
    587   Ident_GetExceptionInfo->setIsPoisoned(Poison);
    588   Ident__abnormal_termination->setIsPoisoned(Poison);
    589   Ident___abnormal_termination->setIsPoisoned(Poison);
    590   Ident_AbnormalTermination->setIsPoisoned(Poison);
    591 }
    592 
    593 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
    594   assert(Identifier.getIdentifierInfo() &&
    595          "Can't handle identifiers without identifier info!");
    596   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
    597     PoisonReasons.find(Identifier.getIdentifierInfo());
    598   if(it == PoisonReasons.end())
    599     Diag(Identifier, diag::err_pp_used_poisoned_id);
    600   else
    601     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
    602 }
    603 
    604 /// \brief Returns a diagnostic message kind for reporting a future keyword as
    605 /// appropriate for the identifier and specified language.
    606 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
    607                                           const LangOptions &LangOpts) {
    608   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
    609 
    610   if (LangOpts.CPlusPlus)
    611     return llvm::StringSwitch<diag::kind>(II.getName())
    612 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
    613         .Case(#NAME, diag::warn_cxx11_keyword)
    614 #include "clang/Basic/TokenKinds.def"
    615         ;
    616 
    617   llvm_unreachable(
    618       "Keyword not known to come from a newer Standard or proposed Standard");
    619 }
    620 
    621 /// HandleIdentifier - This callback is invoked when the lexer reads an
    622 /// identifier.  This callback looks up the identifier in the map and/or
    623 /// potentially macro expands it or turns it into a named token (like 'for').
    624 ///
    625 /// Note that callers of this method are guarded by checking the
    626 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
    627 /// IdentifierInfo methods that compute these properties will need to change to
    628 /// match.
    629 bool Preprocessor::HandleIdentifier(Token &Identifier) {
    630   assert(Identifier.getIdentifierInfo() &&
    631          "Can't handle identifiers without identifier info!");
    632 
    633   IdentifierInfo &II = *Identifier.getIdentifierInfo();
    634 
    635   // If the information about this identifier is out of date, update it from
    636   // the external source.
    637   // We have to treat __VA_ARGS__ in a special way, since it gets
    638   // serialized with isPoisoned = true, but our preprocessor may have
    639   // unpoisoned it if we're defining a C99 macro.
    640   if (II.isOutOfDate()) {
    641     bool CurrentIsPoisoned = false;
    642     if (&II == Ident__VA_ARGS__)
    643       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
    644 
    645     ExternalSource->updateOutOfDateIdentifier(II);
    646     Identifier.setKind(II.getTokenID());
    647 
    648     if (&II == Ident__VA_ARGS__)
    649       II.setIsPoisoned(CurrentIsPoisoned);
    650   }
    651 
    652   // If this identifier was poisoned, and if it was not produced from a macro
    653   // expansion, emit an error.
    654   if (II.isPoisoned() && CurPPLexer) {
    655     HandlePoisonedIdentifier(Identifier);
    656   }
    657 
    658   // If this is a macro to be expanded, do it.
    659   if (MacroDefinition MD = getMacroDefinition(&II)) {
    660     auto *MI = MD.getMacroInfo();
    661     assert(MI && "macro definition with no macro info?");
    662     if (!DisableMacroExpansion) {
    663       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
    664         // C99 6.10.3p10: If the preprocessing token immediately after the
    665         // macro name isn't a '(', this macro should not be expanded.
    666         if (!MI->isFunctionLike() || isNextPPTokenLParen())
    667           return HandleMacroExpandedIdentifier(Identifier, MD);
    668       } else {
    669         // C99 6.10.3.4p2 says that a disabled macro may never again be
    670         // expanded, even if it's in a context where it could be expanded in the
    671         // future.
    672         Identifier.setFlag(Token::DisableExpand);
    673         if (MI->isObjectLike() || isNextPPTokenLParen())
    674           Diag(Identifier, diag::pp_disabled_macro_expansion);
    675       }
    676     }
    677   }
    678 
    679   // If this identifier is a keyword in a newer Standard or proposed Standard,
    680   // produce a warning. Don't warn if we're not considering macro expansion,
    681   // since this identifier might be the name of a macro.
    682   // FIXME: This warning is disabled in cases where it shouldn't be, like
    683   //   "#define constexpr constexpr", "int constexpr;"
    684   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
    685     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
    686         << II.getName();
    687     // Don't diagnose this keyword again in this translation unit.
    688     II.setIsFutureCompatKeyword(false);
    689   }
    690 
    691   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
    692   // then we act as if it is the actual operator and not the textual
    693   // representation of it.
    694   if (II.isCPlusPlusOperatorKeyword())
    695     Identifier.setIdentifierInfo(nullptr);
    696 
    697   // If this is an extension token, diagnose its use.
    698   // We avoid diagnosing tokens that originate from macro definitions.
    699   // FIXME: This warning is disabled in cases where it shouldn't be,
    700   // like "#define TY typeof", "TY(1) x".
    701   if (II.isExtensionToken() && !DisableMacroExpansion)
    702     Diag(Identifier, diag::ext_token_used);
    703 
    704   // If this is the 'import' contextual keyword following an '@', note
    705   // that the next token indicates a module name.
    706   //
    707   // Note that we do not treat 'import' as a contextual
    708   // keyword when we're in a caching lexer, because caching lexers only get
    709   // used in contexts where import declarations are disallowed.
    710   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
    711       !DisableMacroExpansion &&
    712       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
    713       CurLexerKind != CLK_CachingLexer) {
    714     ModuleImportLoc = Identifier.getLocation();
    715     ModuleImportPath.clear();
    716     ModuleImportExpectsIdentifier = true;
    717     CurLexerKind = CLK_LexAfterModuleImport;
    718   }
    719   return true;
    720 }
    721 
    722 void Preprocessor::Lex(Token &Result) {
    723   // We loop here until a lex function returns a token; this avoids recursion.
    724   bool ReturnedToken;
    725   do {
    726     switch (CurLexerKind) {
    727     case CLK_Lexer:
    728       ReturnedToken = CurLexer->Lex(Result);
    729       break;
    730     case CLK_PTHLexer:
    731       ReturnedToken = CurPTHLexer->Lex(Result);
    732       break;
    733     case CLK_TokenLexer:
    734       ReturnedToken = CurTokenLexer->Lex(Result);
    735       break;
    736     case CLK_CachingLexer:
    737       CachingLex(Result);
    738       ReturnedToken = true;
    739       break;
    740     case CLK_LexAfterModuleImport:
    741       LexAfterModuleImport(Result);
    742       ReturnedToken = true;
    743       break;
    744     }
    745   } while (!ReturnedToken);
    746 
    747   LastTokenWasAt = Result.is(tok::at);
    748 }
    749 
    750 
    751 /// \brief Lex a token following the 'import' contextual keyword.
    752 ///
    753 void Preprocessor::LexAfterModuleImport(Token &Result) {
    754   // Figure out what kind of lexer we actually have.
    755   recomputeCurLexerKind();
    756 
    757   // Lex the next token.
    758   Lex(Result);
    759 
    760   // The token sequence
    761   //
    762   //   import identifier (. identifier)*
    763   //
    764   // indicates a module import directive. We already saw the 'import'
    765   // contextual keyword, so now we're looking for the identifiers.
    766   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
    767     // We expected to see an identifier here, and we did; continue handling
    768     // identifiers.
    769     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
    770                                               Result.getLocation()));
    771     ModuleImportExpectsIdentifier = false;
    772     CurLexerKind = CLK_LexAfterModuleImport;
    773     return;
    774   }
    775 
    776   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
    777   // see the next identifier.
    778   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
    779     ModuleImportExpectsIdentifier = true;
    780     CurLexerKind = CLK_LexAfterModuleImport;
    781     return;
    782   }
    783 
    784   // If we have a non-empty module path, load the named module.
    785   if (!ModuleImportPath.empty()) {
    786     Module *Imported = nullptr;
    787     if (getLangOpts().Modules) {
    788       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
    789                                             ModuleImportPath,
    790                                             Module::Hidden,
    791                                             /*IsIncludeDirective=*/false);
    792       if (Imported)
    793         makeModuleVisible(Imported, ModuleImportLoc);
    794     }
    795     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
    796       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
    797   }
    798 }
    799 
    800 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
    801   CurSubmoduleState->VisibleModules.setVisible(
    802       M, Loc, [](Module *) {},
    803       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
    804         // FIXME: Include the path in the diagnostic.
    805         // FIXME: Include the import location for the conflicting module.
    806         Diag(ModuleImportLoc, diag::warn_module_conflict)
    807             << Path[0]->getFullModuleName()
    808             << Conflict->getFullModuleName()
    809             << Message;
    810       });
    811 
    812   // Add this module to the imports list of the currently-built submodule.
    813   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
    814     BuildingSubmoduleStack.back().M->Imports.insert(M);
    815 }
    816 
    817 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
    818                                           const char *DiagnosticTag,
    819                                           bool AllowMacroExpansion) {
    820   // We need at least one string literal.
    821   if (Result.isNot(tok::string_literal)) {
    822     Diag(Result, diag::err_expected_string_literal)
    823       << /*Source='in...'*/0 << DiagnosticTag;
    824     return false;
    825   }
    826 
    827   // Lex string literal tokens, optionally with macro expansion.
    828   SmallVector<Token, 4> StrToks;
    829   do {
    830     StrToks.push_back(Result);
    831 
    832     if (Result.hasUDSuffix())
    833       Diag(Result, diag::err_invalid_string_udl);
    834 
    835     if (AllowMacroExpansion)
    836       Lex(Result);
    837     else
    838       LexUnexpandedToken(Result);
    839   } while (Result.is(tok::string_literal));
    840 
    841   // Concatenate and parse the strings.
    842   StringLiteralParser Literal(StrToks, *this);
    843   assert(Literal.isAscii() && "Didn't allow wide strings in");
    844 
    845   if (Literal.hadError)
    846     return false;
    847 
    848   if (Literal.Pascal) {
    849     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
    850       << /*Source='in...'*/0 << DiagnosticTag;
    851     return false;
    852   }
    853 
    854   String = Literal.GetString();
    855   return true;
    856 }
    857 
    858 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
    859   assert(Tok.is(tok::numeric_constant));
    860   SmallString<8> IntegerBuffer;
    861   bool NumberInvalid = false;
    862   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
    863   if (NumberInvalid)
    864     return false;
    865   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
    866   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
    867     return false;
    868   llvm::APInt APVal(64, 0);
    869   if (Literal.GetIntegerValue(APVal))
    870     return false;
    871   Lex(Tok);
    872   Value = APVal.getLimitedValue();
    873   return true;
    874 }
    875 
    876 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
    877   assert(Handler && "NULL comment handler");
    878   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
    879          CommentHandlers.end() && "Comment handler already registered");
    880   CommentHandlers.push_back(Handler);
    881 }
    882 
    883 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
    884   std::vector<CommentHandler *>::iterator Pos
    885   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
    886   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
    887   CommentHandlers.erase(Pos);
    888 }
    889 
    890 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
    891   bool AnyPendingTokens = false;
    892   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
    893        HEnd = CommentHandlers.end();
    894        H != HEnd; ++H) {
    895     if ((*H)->HandleComment(*this, Comment))
    896       AnyPendingTokens = true;
    897   }
    898   if (!AnyPendingTokens || getCommentRetentionState())
    899     return false;
    900   Lex(result);
    901   return true;
    902 }
    903 
    904 ModuleLoader::~ModuleLoader() { }
    905 
    906 CommentHandler::~CommentHandler() { }
    907 
    908 CodeCompletionHandler::~CodeCompletionHandler() { }
    909 
    910 void Preprocessor::createPreprocessingRecord() {
    911   if (Record)
    912     return;
    913 
    914   Record = new PreprocessingRecord(getSourceManager());
    915   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
    916 }
    917