Home | History | Annotate | Download | only in Lex
      1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This file implements the Preprocessor interface.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 //
     14 // Options to support:
     15 //   -H       - Print the name of each header file used.
     16 //   -d[DNI] - Dump various things.
     17 //   -fworking-directory - #line's with preprocessor's working dir.
     18 //   -fpreprocessed
     19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
     20 //   -W*
     21 //   -w
     22 //
     23 // Messages to emit:
     24 //   "Multiple include guards may be useful for:\n"
     25 //
     26 //===----------------------------------------------------------------------===//
     27 
     28 #include "clang/Lex/Preprocessor.h"
     29 #include "clang/Basic/FileManager.h"
     30 #include "clang/Basic/FileSystemStatCache.h"
     31 #include "clang/Basic/SourceManager.h"
     32 #include "clang/Basic/TargetInfo.h"
     33 #include "clang/Lex/CodeCompletionHandler.h"
     34 #include "clang/Lex/ExternalPreprocessorSource.h"
     35 #include "clang/Lex/HeaderSearch.h"
     36 #include "clang/Lex/LexDiagnostic.h"
     37 #include "clang/Lex/LiteralSupport.h"
     38 #include "clang/Lex/MacroArgs.h"
     39 #include "clang/Lex/MacroInfo.h"
     40 #include "clang/Lex/ModuleLoader.h"
     41 #include "clang/Lex/PTHManager.h"
     42 #include "clang/Lex/Pragma.h"
     43 #include "clang/Lex/PreprocessingRecord.h"
     44 #include "clang/Lex/PreprocessorOptions.h"
     45 #include "clang/Lex/ScratchBuffer.h"
     46 #include "llvm/ADT/APFloat.h"
     47 #include "llvm/ADT/STLExtras.h"
     48 #include "llvm/ADT/SmallString.h"
     49 #include "llvm/ADT/StringExtras.h"
     50 #include "llvm/Support/Capacity.h"
     51 #include "llvm/Support/ConvertUTF.h"
     52 #include "llvm/Support/MemoryBuffer.h"
     53 #include "llvm/Support/raw_ostream.h"
     54 using namespace clang;
     55 
     56 //===----------------------------------------------------------------------===//
     57 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
     58 
     59 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
     60                            DiagnosticsEngine &diags, LangOptions &opts,
     61                            SourceManager &SM, HeaderSearch &Headers,
     62                            ModuleLoader &TheModuleLoader,
     63                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
     64                            TranslationUnitKind TUKind)
     65     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
     66       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
     67       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
     68       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
     69       Identifiers(opts, IILookup),
     70       PragmaHandlers(new PragmaNamespace(StringRef())),
     71       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
     72       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
     73       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
     74       CodeCompletionReached(0), MainFileDir(nullptr),
     75       SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
     76       CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
     77       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
     78       Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
     79   OwnsHeaderSearch = OwnsHeaders;
     80 
     81   CounterValue = 0; // __COUNTER__ starts at 0.
     82 
     83   // Clear stats.
     84   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
     85   NumIf = NumElse = NumEndif = 0;
     86   NumEnteredSourceFiles = 0;
     87   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
     88   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
     89   MaxIncludeStackDepth = 0;
     90   NumSkipped = 0;
     91 
     92   // Default to discarding comments.
     93   KeepComments = false;
     94   KeepMacroComments = false;
     95   SuppressIncludeNotFoundError = false;
     96 
     97   // Macro expansion is enabled.
     98   DisableMacroExpansion = false;
     99   MacroExpansionInDirectivesOverride = false;
    100   InMacroArgs = false;
    101   InMacroArgPreExpansion = false;
    102   NumCachedTokenLexers = 0;
    103   PragmasEnabled = true;
    104   ParsingIfOrElifDirective = false;
    105   PreprocessedOutput = false;
    106 
    107   CachedLexPos = 0;
    108 
    109   // We haven't read anything from the external source.
    110   ReadMacrosFromExternalSource = false;
    111 
    112   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
    113   // This gets unpoisoned where it is allowed.
    114   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
    115   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
    116 
    117   // Initialize the pragma handlers.
    118   RegisterBuiltinPragmas();
    119 
    120   // Initialize builtin macros like __LINE__ and friends.
    121   RegisterBuiltinMacros();
    122 
    123   if(LangOpts.Borland) {
    124     Ident__exception_info        = getIdentifierInfo("_exception_info");
    125     Ident___exception_info       = getIdentifierInfo("__exception_info");
    126     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
    127     Ident__exception_code        = getIdentifierInfo("_exception_code");
    128     Ident___exception_code       = getIdentifierInfo("__exception_code");
    129     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
    130     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
    131     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
    132     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
    133   } else {
    134     Ident__exception_info = Ident__exception_code = nullptr;
    135     Ident__abnormal_termination = Ident___exception_info = nullptr;
    136     Ident___exception_code = Ident___abnormal_termination = nullptr;
    137     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
    138     Ident_AbnormalTermination = nullptr;
    139   }
    140 }
    141 
    142 Preprocessor::~Preprocessor() {
    143   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
    144 
    145   IncludeMacroStack.clear();
    146 
    147   // Destroy any macro definitions.
    148   while (MacroInfoChain *I = MIChainHead) {
    149     MIChainHead = I->Next;
    150     I->~MacroInfoChain();
    151   }
    152 
    153   // Free any cached macro expanders.
    154   // This populates MacroArgCache, so all TokenLexers need to be destroyed
    155   // before the code below that frees up the MacroArgCache list.
    156   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
    157   CurTokenLexer.reset();
    158 
    159   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
    160     DeserialMIChainHead = I->Next;
    161     I->~DeserializedMacroInfoChain();
    162   }
    163 
    164   // Free any cached MacroArgs.
    165   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
    166     ArgList = ArgList->deallocate();
    167 
    168   // Delete the header search info, if we own it.
    169   if (OwnsHeaderSearch)
    170     delete &HeaderInfo;
    171 }
    172 
    173 void Preprocessor::Initialize(const TargetInfo &Target,
    174                               const TargetInfo *AuxTarget) {
    175   assert((!this->Target || this->Target == &Target) &&
    176          "Invalid override of target information");
    177   this->Target = &Target;
    178 
    179   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
    180          "Invalid override of aux target information.");
    181   this->AuxTarget = AuxTarget;
    182 
    183   // Initialize information about built-ins.
    184   BuiltinInfo.InitializeTarget(Target, AuxTarget);
    185   HeaderInfo.setTarget(Target);
    186 }
    187 
    188 void Preprocessor::InitializeForModelFile() {
    189   NumEnteredSourceFiles = 0;
    190 
    191   // Reset pragmas
    192   PragmaHandlersBackup = std::move(PragmaHandlers);
    193   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
    194   RegisterBuiltinPragmas();
    195 
    196   // Reset PredefinesFileID
    197   PredefinesFileID = FileID();
    198 }
    199 
    200 void Preprocessor::FinalizeForModelFile() {
    201   NumEnteredSourceFiles = 1;
    202 
    203   PragmaHandlers = std::move(PragmaHandlersBackup);
    204 }
    205 
    206 void Preprocessor::setPTHManager(PTHManager* pm) {
    207   PTH.reset(pm);
    208   FileMgr.addStatCache(PTH->createStatCache());
    209 }
    210 
    211 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
    212   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
    213                << getSpelling(Tok) << "'";
    214 
    215   if (!DumpFlags) return;
    216 
    217   llvm::errs() << "\t";
    218   if (Tok.isAtStartOfLine())
    219     llvm::errs() << " [StartOfLine]";
    220   if (Tok.hasLeadingSpace())
    221     llvm::errs() << " [LeadingSpace]";
    222   if (Tok.isExpandDisabled())
    223     llvm::errs() << " [ExpandDisabled]";
    224   if (Tok.needsCleaning()) {
    225     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
    226     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
    227                  << "']";
    228   }
    229 
    230   llvm::errs() << "\tLoc=<";
    231   DumpLocation(Tok.getLocation());
    232   llvm::errs() << ">";
    233 }
    234 
    235 void Preprocessor::DumpLocation(SourceLocation Loc) const {
    236   Loc.dump(SourceMgr);
    237 }
    238 
    239 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
    240   llvm::errs() << "MACRO: ";
    241   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
    242     DumpToken(MI.getReplacementToken(i));
    243     llvm::errs() << "  ";
    244   }
    245   llvm::errs() << "\n";
    246 }
    247 
    248 void Preprocessor::PrintStats() {
    249   llvm::errs() << "\n*** Preprocessor Stats:\n";
    250   llvm::errs() << NumDirectives << " directives found:\n";
    251   llvm::errs() << "  " << NumDefined << " #define.\n";
    252   llvm::errs() << "  " << NumUndefined << " #undef.\n";
    253   llvm::errs() << "  #include/#include_next/#import:\n";
    254   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
    255   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
    256   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
    257   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
    258   llvm::errs() << "  " << NumEndif << " #endif.\n";
    259   llvm::errs() << "  " << NumPragma << " #pragma.\n";
    260   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
    261 
    262   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
    263              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
    264              << NumFastMacroExpanded << " on the fast path.\n";
    265   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
    266              << " token paste (##) operations performed, "
    267              << NumFastTokenPaste << " on the fast path.\n";
    268 
    269   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
    270 
    271   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
    272   llvm::errs() << "\n  Macro Expanded Tokens: "
    273                << llvm::capacity_in_bytes(MacroExpandedTokens);
    274   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
    275   // FIXME: List information for all submodules.
    276   llvm::errs() << "\n  Macros: "
    277                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
    278   llvm::errs() << "\n  #pragma push_macro Info: "
    279                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
    280   llvm::errs() << "\n  Poison Reasons: "
    281                << llvm::capacity_in_bytes(PoisonReasons);
    282   llvm::errs() << "\n  Comment Handlers: "
    283                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
    284 }
    285 
    286 Preprocessor::macro_iterator
    287 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
    288   if (IncludeExternalMacros && ExternalSource &&
    289       !ReadMacrosFromExternalSource) {
    290     ReadMacrosFromExternalSource = true;
    291     ExternalSource->ReadDefinedMacros();
    292   }
    293 
    294   // Make sure we cover all macros in visible modules.
    295   for (const ModuleMacro &Macro : ModuleMacros)
    296     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
    297 
    298   return CurSubmoduleState->Macros.begin();
    299 }
    300 
    301 size_t Preprocessor::getTotalMemory() const {
    302   return BP.getTotalMemory()
    303     + llvm::capacity_in_bytes(MacroExpandedTokens)
    304     + Predefines.capacity() /* Predefines buffer. */
    305     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
    306     // and ModuleMacros.
    307     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
    308     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
    309     + llvm::capacity_in_bytes(PoisonReasons)
    310     + llvm::capacity_in_bytes(CommentHandlers);
    311 }
    312 
    313 Preprocessor::macro_iterator
    314 Preprocessor::macro_end(bool IncludeExternalMacros) const {
    315   if (IncludeExternalMacros && ExternalSource &&
    316       !ReadMacrosFromExternalSource) {
    317     ReadMacrosFromExternalSource = true;
    318     ExternalSource->ReadDefinedMacros();
    319   }
    320 
    321   return CurSubmoduleState->Macros.end();
    322 }
    323 
    324 /// \brief Compares macro tokens with a specified token value sequence.
    325 static bool MacroDefinitionEquals(const MacroInfo *MI,
    326                                   ArrayRef<TokenValue> Tokens) {
    327   return Tokens.size() == MI->getNumTokens() &&
    328       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
    329 }
    330 
    331 StringRef Preprocessor::getLastMacroWithSpelling(
    332                                     SourceLocation Loc,
    333                                     ArrayRef<TokenValue> Tokens) const {
    334   SourceLocation BestLocation;
    335   StringRef BestSpelling;
    336   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
    337        I != E; ++I) {
    338     const MacroDirective::DefInfo
    339       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
    340     if (!Def || !Def.getMacroInfo())
    341       continue;
    342     if (!Def.getMacroInfo()->isObjectLike())
    343       continue;
    344     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
    345       continue;
    346     SourceLocation Location = Def.getLocation();
    347     // Choose the macro defined latest.
    348     if (BestLocation.isInvalid() ||
    349         (Location.isValid() &&
    350          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
    351       BestLocation = Location;
    352       BestSpelling = I->first->getName();
    353     }
    354   }
    355   return BestSpelling;
    356 }
    357 
    358 void Preprocessor::recomputeCurLexerKind() {
    359   if (CurLexer)
    360     CurLexerKind = CLK_Lexer;
    361   else if (CurPTHLexer)
    362     CurLexerKind = CLK_PTHLexer;
    363   else if (CurTokenLexer)
    364     CurLexerKind = CLK_TokenLexer;
    365   else
    366     CurLexerKind = CLK_CachingLexer;
    367 }
    368 
    369 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
    370                                           unsigned CompleteLine,
    371                                           unsigned CompleteColumn) {
    372   assert(File);
    373   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
    374   assert(!CodeCompletionFile && "Already set");
    375 
    376   using llvm::MemoryBuffer;
    377 
    378   // Load the actual file's contents.
    379   bool Invalid = false;
    380   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
    381   if (Invalid)
    382     return true;
    383 
    384   // Find the byte position of the truncation point.
    385   const char *Position = Buffer->getBufferStart();
    386   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
    387     for (; *Position; ++Position) {
    388       if (*Position != '\r' && *Position != '\n')
    389         continue;
    390 
    391       // Eat \r\n or \n\r as a single line.
    392       if ((Position[1] == '\r' || Position[1] == '\n') &&
    393           Position[0] != Position[1])
    394         ++Position;
    395       ++Position;
    396       break;
    397     }
    398   }
    399 
    400   Position += CompleteColumn - 1;
    401 
    402   // If pointing inside the preamble, adjust the position at the beginning of
    403   // the file after the preamble.
    404   if (SkipMainFilePreamble.first &&
    405       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
    406     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
    407       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
    408   }
    409 
    410   if (Position > Buffer->getBufferEnd())
    411     Position = Buffer->getBufferEnd();
    412 
    413   CodeCompletionFile = File;
    414   CodeCompletionOffset = Position - Buffer->getBufferStart();
    415 
    416   std::unique_ptr<MemoryBuffer> NewBuffer =
    417       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
    418                                           Buffer->getBufferIdentifier());
    419   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
    420   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
    421   *NewPos = '\0';
    422   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
    423   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
    424 
    425   return false;
    426 }
    427 
    428 void Preprocessor::CodeCompleteNaturalLanguage() {
    429   if (CodeComplete)
    430     CodeComplete->CodeCompleteNaturalLanguage();
    431   setCodeCompletionReached();
    432 }
    433 
    434 /// getSpelling - This method is used to get the spelling of a token into a
    435 /// SmallVector. Note that the returned StringRef may not point to the
    436 /// supplied buffer if a copy can be avoided.
    437 StringRef Preprocessor::getSpelling(const Token &Tok,
    438                                           SmallVectorImpl<char> &Buffer,
    439                                           bool *Invalid) const {
    440   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
    441   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
    442     // Try the fast path.
    443     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
    444       return II->getName();
    445   }
    446 
    447   // Resize the buffer if we need to copy into it.
    448   if (Tok.needsCleaning())
    449     Buffer.resize(Tok.getLength());
    450 
    451   const char *Ptr = Buffer.data();
    452   unsigned Len = getSpelling(Tok, Ptr, Invalid);
    453   return StringRef(Ptr, Len);
    454 }
    455 
    456 /// CreateString - Plop the specified string into a scratch buffer and return a
    457 /// location for it.  If specified, the source location provides a source
    458 /// location for the token.
    459 void Preprocessor::CreateString(StringRef Str, Token &Tok,
    460                                 SourceLocation ExpansionLocStart,
    461                                 SourceLocation ExpansionLocEnd) {
    462   Tok.setLength(Str.size());
    463 
    464   const char *DestPtr;
    465   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
    466 
    467   if (ExpansionLocStart.isValid())
    468     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
    469                                        ExpansionLocEnd, Str.size());
    470   Tok.setLocation(Loc);
    471 
    472   // If this is a raw identifier or a literal token, set the pointer data.
    473   if (Tok.is(tok::raw_identifier))
    474     Tok.setRawIdentifierData(DestPtr);
    475   else if (Tok.isLiteral())
    476     Tok.setLiteralData(DestPtr);
    477 }
    478 
    479 Module *Preprocessor::getCurrentModule() {
    480   if (getLangOpts().CurrentModule.empty())
    481     return nullptr;
    482 
    483   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
    484 }
    485 
    486 //===----------------------------------------------------------------------===//
    487 // Preprocessor Initialization Methods
    488 //===----------------------------------------------------------------------===//
    489 
    490 
    491 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
    492 /// which implicitly adds the builtin defines etc.
    493 void Preprocessor::EnterMainSourceFile() {
    494   // We do not allow the preprocessor to reenter the main file.  Doing so will
    495   // cause FileID's to accumulate information from both runs (e.g. #line
    496   // information) and predefined macros aren't guaranteed to be set properly.
    497   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
    498   FileID MainFileID = SourceMgr.getMainFileID();
    499 
    500   // If MainFileID is loaded it means we loaded an AST file, no need to enter
    501   // a main file.
    502   if (!SourceMgr.isLoadedFileID(MainFileID)) {
    503     // Enter the main file source buffer.
    504     EnterSourceFile(MainFileID, nullptr, SourceLocation());
    505 
    506     // If we've been asked to skip bytes in the main file (e.g., as part of a
    507     // precompiled preamble), do so now.
    508     if (SkipMainFilePreamble.first > 0)
    509       CurLexer->SkipBytes(SkipMainFilePreamble.first,
    510                           SkipMainFilePreamble.second);
    511 
    512     // Tell the header info that the main file was entered.  If the file is later
    513     // #imported, it won't be re-entered.
    514     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
    515       HeaderInfo.IncrementIncludeCount(FE);
    516   }
    517 
    518   // Preprocess Predefines to populate the initial preprocessor state.
    519   std::unique_ptr<llvm::MemoryBuffer> SB =
    520     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
    521   assert(SB && "Cannot create predefined source buffer");
    522   FileID FID = SourceMgr.createFileID(std::move(SB));
    523   assert(FID.isValid() && "Could not create FileID for predefines?");
    524   setPredefinesFileID(FID);
    525 
    526   // Start parsing the predefines.
    527   EnterSourceFile(FID, nullptr, SourceLocation());
    528 }
    529 
    530 void Preprocessor::EndSourceFile() {
    531   // Notify the client that we reached the end of the source file.
    532   if (Callbacks)
    533     Callbacks->EndOfMainFile();
    534 }
    535 
    536 //===----------------------------------------------------------------------===//
    537 // Lexer Event Handling.
    538 //===----------------------------------------------------------------------===//
    539 
    540 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
    541 /// identifier information for the token and install it into the token,
    542 /// updating the token kind accordingly.
    543 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
    544   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
    545 
    546   // Look up this token, see if it is a macro, or if it is a language keyword.
    547   IdentifierInfo *II;
    548   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
    549     // No cleaning needed, just use the characters from the lexed buffer.
    550     II = getIdentifierInfo(Identifier.getRawIdentifier());
    551   } else {
    552     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
    553     SmallString<64> IdentifierBuffer;
    554     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
    555 
    556     if (Identifier.hasUCN()) {
    557       SmallString<64> UCNIdentifierBuffer;
    558       expandUCNs(UCNIdentifierBuffer, CleanedStr);
    559       II = getIdentifierInfo(UCNIdentifierBuffer);
    560     } else {
    561       II = getIdentifierInfo(CleanedStr);
    562     }
    563   }
    564 
    565   // Update the token info (identifier info and appropriate token kind).
    566   Identifier.setIdentifierInfo(II);
    567   Identifier.setKind(II->getTokenID());
    568 
    569   return II;
    570 }
    571 
    572 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
    573   PoisonReasons[II] = DiagID;
    574 }
    575 
    576 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
    577   assert(Ident__exception_code && Ident__exception_info);
    578   assert(Ident___exception_code && Ident___exception_info);
    579   Ident__exception_code->setIsPoisoned(Poison);
    580   Ident___exception_code->setIsPoisoned(Poison);
    581   Ident_GetExceptionCode->setIsPoisoned(Poison);
    582   Ident__exception_info->setIsPoisoned(Poison);
    583   Ident___exception_info->setIsPoisoned(Poison);
    584   Ident_GetExceptionInfo->setIsPoisoned(Poison);
    585   Ident__abnormal_termination->setIsPoisoned(Poison);
    586   Ident___abnormal_termination->setIsPoisoned(Poison);
    587   Ident_AbnormalTermination->setIsPoisoned(Poison);
    588 }
    589 
    590 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
    591   assert(Identifier.getIdentifierInfo() &&
    592          "Can't handle identifiers without identifier info!");
    593   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
    594     PoisonReasons.find(Identifier.getIdentifierInfo());
    595   if(it == PoisonReasons.end())
    596     Diag(Identifier, diag::err_pp_used_poisoned_id);
    597   else
    598     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
    599 }
    600 
    601 /// \brief Returns a diagnostic message kind for reporting a future keyword as
    602 /// appropriate for the identifier and specified language.
    603 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
    604                                           const LangOptions &LangOpts) {
    605   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
    606 
    607   if (LangOpts.CPlusPlus)
    608     return llvm::StringSwitch<diag::kind>(II.getName())
    609 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
    610         .Case(#NAME, diag::warn_cxx11_keyword)
    611 #include "clang/Basic/TokenKinds.def"
    612         ;
    613 
    614   llvm_unreachable(
    615       "Keyword not known to come from a newer Standard or proposed Standard");
    616 }
    617 
    618 /// HandleIdentifier - This callback is invoked when the lexer reads an
    619 /// identifier.  This callback looks up the identifier in the map and/or
    620 /// potentially macro expands it or turns it into a named token (like 'for').
    621 ///
    622 /// Note that callers of this method are guarded by checking the
    623 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
    624 /// IdentifierInfo methods that compute these properties will need to change to
    625 /// match.
    626 bool Preprocessor::HandleIdentifier(Token &Identifier) {
    627   assert(Identifier.getIdentifierInfo() &&
    628          "Can't handle identifiers without identifier info!");
    629 
    630   IdentifierInfo &II = *Identifier.getIdentifierInfo();
    631 
    632   // If the information about this identifier is out of date, update it from
    633   // the external source.
    634   // We have to treat __VA_ARGS__ in a special way, since it gets
    635   // serialized with isPoisoned = true, but our preprocessor may have
    636   // unpoisoned it if we're defining a C99 macro.
    637   if (II.isOutOfDate()) {
    638     bool CurrentIsPoisoned = false;
    639     if (&II == Ident__VA_ARGS__)
    640       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
    641 
    642     ExternalSource->updateOutOfDateIdentifier(II);
    643     Identifier.setKind(II.getTokenID());
    644 
    645     if (&II == Ident__VA_ARGS__)
    646       II.setIsPoisoned(CurrentIsPoisoned);
    647   }
    648 
    649   // If this identifier was poisoned, and if it was not produced from a macro
    650   // expansion, emit an error.
    651   if (II.isPoisoned() && CurPPLexer) {
    652     HandlePoisonedIdentifier(Identifier);
    653   }
    654 
    655   // If this is a macro to be expanded, do it.
    656   if (MacroDefinition MD = getMacroDefinition(&II)) {
    657     auto *MI = MD.getMacroInfo();
    658     assert(MI && "macro definition with no macro info?");
    659     if (!DisableMacroExpansion) {
    660       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
    661         // C99 6.10.3p10: If the preprocessing token immediately after the
    662         // macro name isn't a '(', this macro should not be expanded.
    663         if (!MI->isFunctionLike() || isNextPPTokenLParen())
    664           return HandleMacroExpandedIdentifier(Identifier, MD);
    665       } else {
    666         // C99 6.10.3.4p2 says that a disabled macro may never again be
    667         // expanded, even if it's in a context where it could be expanded in the
    668         // future.
    669         Identifier.setFlag(Token::DisableExpand);
    670         if (MI->isObjectLike() || isNextPPTokenLParen())
    671           Diag(Identifier, diag::pp_disabled_macro_expansion);
    672       }
    673     }
    674   }
    675 
    676   // If this identifier is a keyword in a newer Standard or proposed Standard,
    677   // produce a warning. Don't warn if we're not considering macro expansion,
    678   // since this identifier might be the name of a macro.
    679   // FIXME: This warning is disabled in cases where it shouldn't be, like
    680   //   "#define constexpr constexpr", "int constexpr;"
    681   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
    682     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
    683         << II.getName();
    684     // Don't diagnose this keyword again in this translation unit.
    685     II.setIsFutureCompatKeyword(false);
    686   }
    687 
    688   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
    689   // then we act as if it is the actual operator and not the textual
    690   // representation of it.
    691   if (II.isCPlusPlusOperatorKeyword())
    692     Identifier.setIdentifierInfo(nullptr);
    693 
    694   // If this is an extension token, diagnose its use.
    695   // We avoid diagnosing tokens that originate from macro definitions.
    696   // FIXME: This warning is disabled in cases where it shouldn't be,
    697   // like "#define TY typeof", "TY(1) x".
    698   if (II.isExtensionToken() && !DisableMacroExpansion)
    699     Diag(Identifier, diag::ext_token_used);
    700 
    701   // If this is the 'import' contextual keyword following an '@', note
    702   // that the next token indicates a module name.
    703   //
    704   // Note that we do not treat 'import' as a contextual
    705   // keyword when we're in a caching lexer, because caching lexers only get
    706   // used in contexts where import declarations are disallowed.
    707   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
    708       !DisableMacroExpansion &&
    709       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
    710       CurLexerKind != CLK_CachingLexer) {
    711     ModuleImportLoc = Identifier.getLocation();
    712     ModuleImportPath.clear();
    713     ModuleImportExpectsIdentifier = true;
    714     CurLexerKind = CLK_LexAfterModuleImport;
    715   }
    716   return true;
    717 }
    718 
    719 void Preprocessor::Lex(Token &Result) {
    720   // We loop here until a lex function returns a token; this avoids recursion.
    721   bool ReturnedToken;
    722   do {
    723     switch (CurLexerKind) {
    724     case CLK_Lexer:
    725       ReturnedToken = CurLexer->Lex(Result);
    726       break;
    727     case CLK_PTHLexer:
    728       ReturnedToken = CurPTHLexer->Lex(Result);
    729       break;
    730     case CLK_TokenLexer:
    731       ReturnedToken = CurTokenLexer->Lex(Result);
    732       break;
    733     case CLK_CachingLexer:
    734       CachingLex(Result);
    735       ReturnedToken = true;
    736       break;
    737     case CLK_LexAfterModuleImport:
    738       LexAfterModuleImport(Result);
    739       ReturnedToken = true;
    740       break;
    741     }
    742   } while (!ReturnedToken);
    743 
    744   LastTokenWasAt = Result.is(tok::at);
    745 }
    746 
    747 
    748 /// \brief Lex a token following the 'import' contextual keyword.
    749 ///
    750 void Preprocessor::LexAfterModuleImport(Token &Result) {
    751   // Figure out what kind of lexer we actually have.
    752   recomputeCurLexerKind();
    753 
    754   // Lex the next token.
    755   Lex(Result);
    756 
    757   // The token sequence
    758   //
    759   //   import identifier (. identifier)*
    760   //
    761   // indicates a module import directive. We already saw the 'import'
    762   // contextual keyword, so now we're looking for the identifiers.
    763   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
    764     // We expected to see an identifier here, and we did; continue handling
    765     // identifiers.
    766     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
    767                                               Result.getLocation()));
    768     ModuleImportExpectsIdentifier = false;
    769     CurLexerKind = CLK_LexAfterModuleImport;
    770     return;
    771   }
    772 
    773   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
    774   // see the next identifier.
    775   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
    776     ModuleImportExpectsIdentifier = true;
    777     CurLexerKind = CLK_LexAfterModuleImport;
    778     return;
    779   }
    780 
    781   // If we have a non-empty module path, load the named module.
    782   if (!ModuleImportPath.empty()) {
    783     Module *Imported = nullptr;
    784     if (getLangOpts().Modules) {
    785       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
    786                                             ModuleImportPath,
    787                                             Module::Hidden,
    788                                             /*IsIncludeDirective=*/false);
    789       if (Imported)
    790         makeModuleVisible(Imported, ModuleImportLoc);
    791     }
    792     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
    793       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
    794   }
    795 }
    796 
    797 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
    798   CurSubmoduleState->VisibleModules.setVisible(
    799       M, Loc, [](Module *) {},
    800       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
    801         // FIXME: Include the path in the diagnostic.
    802         // FIXME: Include the import location for the conflicting module.
    803         Diag(ModuleImportLoc, diag::warn_module_conflict)
    804             << Path[0]->getFullModuleName()
    805             << Conflict->getFullModuleName()
    806             << Message;
    807       });
    808 
    809   // Add this module to the imports list of the currently-built submodule.
    810   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
    811     BuildingSubmoduleStack.back().M->Imports.insert(M);
    812 }
    813 
    814 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
    815                                           const char *DiagnosticTag,
    816                                           bool AllowMacroExpansion) {
    817   // We need at least one string literal.
    818   if (Result.isNot(tok::string_literal)) {
    819     Diag(Result, diag::err_expected_string_literal)
    820       << /*Source='in...'*/0 << DiagnosticTag;
    821     return false;
    822   }
    823 
    824   // Lex string literal tokens, optionally with macro expansion.
    825   SmallVector<Token, 4> StrToks;
    826   do {
    827     StrToks.push_back(Result);
    828 
    829     if (Result.hasUDSuffix())
    830       Diag(Result, diag::err_invalid_string_udl);
    831 
    832     if (AllowMacroExpansion)
    833       Lex(Result);
    834     else
    835       LexUnexpandedToken(Result);
    836   } while (Result.is(tok::string_literal));
    837 
    838   // Concatenate and parse the strings.
    839   StringLiteralParser Literal(StrToks, *this);
    840   assert(Literal.isAscii() && "Didn't allow wide strings in");
    841 
    842   if (Literal.hadError)
    843     return false;
    844 
    845   if (Literal.Pascal) {
    846     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
    847       << /*Source='in...'*/0 << DiagnosticTag;
    848     return false;
    849   }
    850 
    851   String = Literal.GetString();
    852   return true;
    853 }
    854 
    855 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
    856   assert(Tok.is(tok::numeric_constant));
    857   SmallString<8> IntegerBuffer;
    858   bool NumberInvalid = false;
    859   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
    860   if (NumberInvalid)
    861     return false;
    862   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
    863   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
    864     return false;
    865   llvm::APInt APVal(64, 0);
    866   if (Literal.GetIntegerValue(APVal))
    867     return false;
    868   Lex(Tok);
    869   Value = APVal.getLimitedValue();
    870   return true;
    871 }
    872 
    873 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
    874   assert(Handler && "NULL comment handler");
    875   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
    876          CommentHandlers.end() && "Comment handler already registered");
    877   CommentHandlers.push_back(Handler);
    878 }
    879 
    880 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
    881   std::vector<CommentHandler *>::iterator Pos
    882   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
    883   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
    884   CommentHandlers.erase(Pos);
    885 }
    886 
    887 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
    888   bool AnyPendingTokens = false;
    889   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
    890        HEnd = CommentHandlers.end();
    891        H != HEnd; ++H) {
    892     if ((*H)->HandleComment(*this, Comment))
    893       AnyPendingTokens = true;
    894   }
    895   if (!AnyPendingTokens || getCommentRetentionState())
    896     return false;
    897   Lex(result);
    898   return true;
    899 }
    900 
    901 ModuleLoader::~ModuleLoader() { }
    902 
    903 CommentHandler::~CommentHandler() { }
    904 
    905 CodeCompletionHandler::~CodeCompletionHandler() { }
    906 
    907 void Preprocessor::createPreprocessingRecord() {
    908   if (Record)
    909     return;
    910 
    911   Record = new PreprocessingRecord(getSourceManager());
    912   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
    913 }
    914