1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Defines the clang::Preprocessor interface. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 16 #define LLVM_CLANG_LEX_PREPROCESSOR_H 17 18 #include "clang/Basic/Builtins.h" 19 #include "clang/Basic/Diagnostic.h" 20 #include "clang/Basic/IdentifierTable.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Lex/Lexer.h" 23 #include "clang/Lex/MacroInfo.h" 24 #include "clang/Lex/ModuleMap.h" 25 #include "clang/Lex/PPCallbacks.h" 26 #include "clang/Lex/PTHLexer.h" 27 #include "clang/Lex/TokenLexer.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/IntrusiveRefCntPtr.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/TinyPtrVector.h" 34 #include "llvm/Support/Allocator.h" 35 #include "llvm/Support/Registry.h" 36 #include <memory> 37 #include <vector> 38 39 namespace llvm { 40 template<unsigned InternalLen> class SmallString; 41 } 42 43 namespace clang { 44 45 class SourceManager; 46 class ExternalPreprocessorSource; 47 class FileManager; 48 class FileEntry; 49 class HeaderSearch; 50 class MemoryBufferCache; 51 class PragmaNamespace; 52 class PragmaHandler; 53 class CommentHandler; 54 class ScratchBuffer; 55 class TargetInfo; 56 class PPCallbacks; 57 class CodeCompletionHandler; 58 class DirectoryLookup; 59 class PreprocessingRecord; 60 class ModuleLoader; 61 class PTHManager; 62 class PreprocessorOptions; 63 64 /// \brief Stores token information for comparing actual tokens with 65 /// predefined values. Only handles simple tokens and identifiers. 66 class TokenValue { 67 tok::TokenKind Kind; 68 IdentifierInfo *II; 69 70 public: 71 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 72 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 73 assert(Kind != tok::identifier && 74 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 75 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 76 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 77 } 78 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 79 bool operator==(const Token &Tok) const { 80 return Tok.getKind() == Kind && 81 (!II || II == Tok.getIdentifierInfo()); 82 } 83 }; 84 85 /// \brief Context in which macro name is used. 86 enum MacroUse { 87 MU_Other = 0, // other than #define or #undef 88 MU_Define = 1, // macro name specified in #define 89 MU_Undef = 2 // macro name specified in #undef 90 }; 91 92 /// \brief Engages in a tight little dance with the lexer to efficiently 93 /// preprocess tokens. 94 /// 95 /// Lexers know only about tokens within a single source file, and don't 96 /// know anything about preprocessor-level issues like the \#include stack, 97 /// token expansion, etc. 98 class Preprocessor { 99 std::shared_ptr<PreprocessorOptions> PPOpts; 100 DiagnosticsEngine *Diags; 101 LangOptions &LangOpts; 102 const TargetInfo *Target; 103 const TargetInfo *AuxTarget; 104 FileManager &FileMgr; 105 SourceManager &SourceMgr; 106 MemoryBufferCache &PCMCache; 107 std::unique_ptr<ScratchBuffer> ScratchBuf; 108 HeaderSearch &HeaderInfo; 109 ModuleLoader &TheModuleLoader; 110 111 /// \brief External source of macros. 112 ExternalPreprocessorSource *ExternalSource; 113 114 115 /// An optional PTHManager object used for getting tokens from 116 /// a token cache rather than lexing the original source file. 117 std::unique_ptr<PTHManager> PTH; 118 119 /// A BumpPtrAllocator object used to quickly allocate and release 120 /// objects internal to the Preprocessor. 121 llvm::BumpPtrAllocator BP; 122 123 /// Identifiers for builtin macros and other builtins. 124 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 125 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 126 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 127 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 128 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 129 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 130 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 131 IdentifierInfo *Ident__identifier; // __identifier 132 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 133 IdentifierInfo *Ident__has_feature; // __has_feature 134 IdentifierInfo *Ident__has_extension; // __has_extension 135 IdentifierInfo *Ident__has_builtin; // __has_builtin 136 IdentifierInfo *Ident__has_attribute; // __has_attribute 137 IdentifierInfo *Ident__has_include; // __has_include 138 IdentifierInfo *Ident__has_include_next; // __has_include_next 139 IdentifierInfo *Ident__has_warning; // __has_warning 140 IdentifierInfo *Ident__is_identifier; // __is_identifier 141 IdentifierInfo *Ident__building_module; // __building_module 142 IdentifierInfo *Ident__MODULE__; // __MODULE__ 143 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 144 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 145 146 SourceLocation DATELoc, TIMELoc; 147 unsigned CounterValue; // Next __COUNTER__ value. 148 149 enum { 150 /// \brief Maximum depth of \#includes. 151 MaxAllowedIncludeStackDepth = 200 152 }; 153 154 // State that is set before the preprocessor begins. 155 bool KeepComments : 1; 156 bool KeepMacroComments : 1; 157 bool SuppressIncludeNotFoundError : 1; 158 159 // State that changes while the preprocessor runs: 160 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 161 162 /// Whether the preprocessor owns the header search object. 163 bool OwnsHeaderSearch : 1; 164 165 /// True if macro expansion is disabled. 166 bool DisableMacroExpansion : 1; 167 168 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 169 /// when parsing preprocessor directives. 170 bool MacroExpansionInDirectivesOverride : 1; 171 172 class ResetMacroExpansionHelper; 173 174 /// \brief Whether we have already loaded macros from the external source. 175 mutable bool ReadMacrosFromExternalSource : 1; 176 177 /// \brief True if pragmas are enabled. 178 bool PragmasEnabled : 1; 179 180 /// \brief True if the current build action is a preprocessing action. 181 bool PreprocessedOutput : 1; 182 183 /// \brief True if we are currently preprocessing a #if or #elif directive 184 bool ParsingIfOrElifDirective; 185 186 /// \brief True if we are pre-expanding macro arguments. 187 bool InMacroArgPreExpansion; 188 189 /// \brief Mapping/lookup information for all identifiers in 190 /// the program, including program keywords. 191 mutable IdentifierTable Identifiers; 192 193 /// \brief This table contains all the selectors in the program. 194 /// 195 /// Unlike IdentifierTable above, this table *isn't* populated by the 196 /// preprocessor. It is declared/expanded here because its role/lifetime is 197 /// conceptually similar to the IdentifierTable. In addition, the current 198 /// control flow (in clang::ParseAST()), make it convenient to put here. 199 /// 200 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 201 /// the lifetime of the preprocessor. 202 SelectorTable Selectors; 203 204 /// \brief Information about builtins. 205 Builtin::Context BuiltinInfo; 206 207 /// \brief Tracks all of the pragmas that the client registered 208 /// with this preprocessor. 209 std::unique_ptr<PragmaNamespace> PragmaHandlers; 210 211 /// \brief Pragma handlers of the original source is stored here during the 212 /// parsing of a model file. 213 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 214 215 /// \brief Tracks all of the comment handlers that the client registered 216 /// with this preprocessor. 217 std::vector<CommentHandler *> CommentHandlers; 218 219 /// \brief True if we want to ignore EOF token and continue later on (thus 220 /// avoid tearing the Lexer and etc. down). 221 bool IncrementalProcessing; 222 223 /// The kind of translation unit we are processing. 224 TranslationUnitKind TUKind; 225 226 /// \brief The code-completion handler. 227 CodeCompletionHandler *CodeComplete; 228 229 /// \brief The file that we're performing code-completion for, if any. 230 const FileEntry *CodeCompletionFile; 231 232 /// \brief The offset in file for the code-completion point. 233 unsigned CodeCompletionOffset; 234 235 /// \brief The location for the code-completion point. This gets instantiated 236 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 237 SourceLocation CodeCompletionLoc; 238 239 /// \brief The start location for the file of the code-completion point. 240 /// 241 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 242 /// for preprocessing. 243 SourceLocation CodeCompletionFileLoc; 244 245 /// \brief The source location of the \c import contextual keyword we just 246 /// lexed, if any. 247 SourceLocation ModuleImportLoc; 248 249 /// \brief The module import path that we're currently processing. 250 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 251 252 /// \brief Whether the last token we lexed was an '@'. 253 bool LastTokenWasAt; 254 255 /// \brief Whether the module import expects an identifier next. Otherwise, 256 /// it expects a '.' or ';'. 257 bool ModuleImportExpectsIdentifier; 258 259 /// \brief The source location of the currently-active 260 /// \#pragma clang arc_cf_code_audited begin. 261 SourceLocation PragmaARCCFCodeAuditedLoc; 262 263 /// \brief The source location of the currently-active 264 /// \#pragma clang assume_nonnull begin. 265 SourceLocation PragmaAssumeNonNullLoc; 266 267 /// \brief True if we hit the code-completion point. 268 bool CodeCompletionReached; 269 270 /// \brief The code completion token containing the information 271 /// on the stem that is to be code completed. 272 IdentifierInfo *CodeCompletionII; 273 274 /// \brief The directory that the main file should be considered to occupy, 275 /// if it does not correspond to a real file (as happens when building a 276 /// module). 277 const DirectoryEntry *MainFileDir; 278 279 /// \brief The number of bytes that we will initially skip when entering the 280 /// main file, along with a flag that indicates whether skipping this number 281 /// of bytes will place the lexer at the start of a line. 282 /// 283 /// This is used when loading a precompiled preamble. 284 std::pair<int, bool> SkipMainFilePreamble; 285 286 class PreambleConditionalStackStore { 287 enum State { 288 Off = 0, 289 Recording = 1, 290 Replaying = 2, 291 }; 292 293 public: 294 PreambleConditionalStackStore() : ConditionalStackState(Off) {} 295 296 void startRecording() { ConditionalStackState = Recording; } 297 void startReplaying() { ConditionalStackState = Replaying; } 298 bool isRecording() const { return ConditionalStackState == Recording; } 299 bool isReplaying() const { return ConditionalStackState == Replaying; } 300 301 ArrayRef<PPConditionalInfo> getStack() const { 302 return ConditionalStack; 303 } 304 305 void doneReplaying() { 306 ConditionalStack.clear(); 307 ConditionalStackState = Off; 308 } 309 310 void setStack(ArrayRef<PPConditionalInfo> s) { 311 if (!isRecording() && !isReplaying()) 312 return; 313 ConditionalStack.clear(); 314 ConditionalStack.append(s.begin(), s.end()); 315 } 316 317 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 318 319 private: 320 SmallVector<PPConditionalInfo, 4> ConditionalStack; 321 State ConditionalStackState; 322 } PreambleConditionalStack; 323 324 /// \brief The current top of the stack that we're lexing from if 325 /// not expanding a macro and we are lexing directly from source code. 326 /// 327 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 328 std::unique_ptr<Lexer> CurLexer; 329 330 /// \brief The current top of stack that we're lexing from if 331 /// not expanding from a macro and we are lexing from a PTH cache. 332 /// 333 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 334 std::unique_ptr<PTHLexer> CurPTHLexer; 335 336 /// \brief The current top of the stack what we're lexing from 337 /// if not expanding a macro. 338 /// 339 /// This is an alias for either CurLexer or CurPTHLexer. 340 PreprocessorLexer *CurPPLexer; 341 342 /// \brief Used to find the current FileEntry, if CurLexer is non-null 343 /// and if applicable. 344 /// 345 /// This allows us to implement \#include_next and find directory-specific 346 /// properties. 347 const DirectoryLookup *CurDirLookup; 348 349 /// \brief The current macro we are expanding, if we are expanding a macro. 350 /// 351 /// One of CurLexer and CurTokenLexer must be null. 352 std::unique_ptr<TokenLexer> CurTokenLexer; 353 354 /// \brief The kind of lexer we're currently working with. 355 enum CurLexerKind { 356 CLK_Lexer, 357 CLK_PTHLexer, 358 CLK_TokenLexer, 359 CLK_CachingLexer, 360 CLK_LexAfterModuleImport 361 } CurLexerKind; 362 363 /// \brief If the current lexer is for a submodule that is being built, this 364 /// is that submodule. 365 Module *CurLexerSubmodule; 366 367 /// \brief Keeps track of the stack of files currently 368 /// \#included, and macros currently being expanded from, not counting 369 /// CurLexer/CurTokenLexer. 370 struct IncludeStackInfo { 371 enum CurLexerKind CurLexerKind; 372 Module *TheSubmodule; 373 std::unique_ptr<Lexer> TheLexer; 374 std::unique_ptr<PTHLexer> ThePTHLexer; 375 PreprocessorLexer *ThePPLexer; 376 std::unique_ptr<TokenLexer> TheTokenLexer; 377 const DirectoryLookup *TheDirLookup; 378 379 // The following constructors are completely useless copies of the default 380 // versions, only needed to pacify MSVC. 381 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 382 std::unique_ptr<Lexer> &&TheLexer, 383 std::unique_ptr<PTHLexer> &&ThePTHLexer, 384 PreprocessorLexer *ThePPLexer, 385 std::unique_ptr<TokenLexer> &&TheTokenLexer, 386 const DirectoryLookup *TheDirLookup) 387 : CurLexerKind(std::move(CurLexerKind)), 388 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 389 ThePTHLexer(std::move(ThePTHLexer)), 390 ThePPLexer(std::move(ThePPLexer)), 391 TheTokenLexer(std::move(TheTokenLexer)), 392 TheDirLookup(std::move(TheDirLookup)) {} 393 }; 394 std::vector<IncludeStackInfo> IncludeMacroStack; 395 396 /// \brief Actions invoked when some preprocessor activity is 397 /// encountered (e.g. a file is \#included, etc). 398 std::unique_ptr<PPCallbacks> Callbacks; 399 400 struct MacroExpandsInfo { 401 Token Tok; 402 MacroDefinition MD; 403 SourceRange Range; 404 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 405 : Tok(Tok), MD(MD), Range(Range) { } 406 }; 407 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 408 409 /// Information about a name that has been used to define a module macro. 410 struct ModuleMacroInfo { 411 ModuleMacroInfo(MacroDirective *MD) 412 : MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {} 413 414 /// The most recent macro directive for this identifier. 415 MacroDirective *MD; 416 /// The active module macros for this identifier. 417 llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros; 418 /// The generation number at which we last updated ActiveModuleMacros. 419 /// \see Preprocessor::VisibleModules. 420 unsigned ActiveModuleMacrosGeneration; 421 /// Whether this macro name is ambiguous. 422 bool IsAmbiguous; 423 /// The module macros that are overridden by this macro. 424 llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros; 425 }; 426 427 /// The state of a macro for an identifier. 428 class MacroState { 429 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 430 431 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 432 const IdentifierInfo *II) const { 433 if (II->isOutOfDate()) 434 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 435 // FIXME: Find a spare bit on IdentifierInfo and store a 436 // HasModuleMacros flag. 437 if (!II->hasMacroDefinition() || 438 (!PP.getLangOpts().Modules && 439 !PP.getLangOpts().ModulesLocalVisibility) || 440 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 441 return nullptr; 442 443 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 444 if (!Info) { 445 Info = new (PP.getPreprocessorAllocator()) 446 ModuleMacroInfo(State.get<MacroDirective *>()); 447 State = Info; 448 } 449 450 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 451 Info->ActiveModuleMacrosGeneration) 452 PP.updateModuleMacroInfo(II, *Info); 453 return Info; 454 } 455 456 public: 457 MacroState() : MacroState(nullptr) {} 458 MacroState(MacroDirective *MD) : State(MD) {} 459 MacroState(MacroState &&O) noexcept : State(O.State) { 460 O.State = (MacroDirective *)nullptr; 461 } 462 MacroState &operator=(MacroState &&O) noexcept { 463 auto S = O.State; 464 O.State = (MacroDirective *)nullptr; 465 State = S; 466 return *this; 467 } 468 ~MacroState() { 469 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 470 Info->~ModuleMacroInfo(); 471 } 472 473 MacroDirective *getLatest() const { 474 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 475 return Info->MD; 476 return State.get<MacroDirective*>(); 477 } 478 void setLatest(MacroDirective *MD) { 479 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 480 Info->MD = MD; 481 else 482 State = MD; 483 } 484 485 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 486 auto *Info = getModuleInfo(PP, II); 487 return Info ? Info->IsAmbiguous : false; 488 } 489 ArrayRef<ModuleMacro *> 490 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 491 if (auto *Info = getModuleInfo(PP, II)) 492 return Info->ActiveModuleMacros; 493 return None; 494 } 495 496 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 497 SourceManager &SourceMgr) const { 498 // FIXME: Incorporate module macros into the result of this. 499 if (auto *Latest = getLatest()) 500 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 501 return MacroDirective::DefInfo(); 502 } 503 504 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 505 if (auto *Info = getModuleInfo(PP, II)) { 506 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 507 Info->ActiveModuleMacros.begin(), 508 Info->ActiveModuleMacros.end()); 509 Info->ActiveModuleMacros.clear(); 510 Info->IsAmbiguous = false; 511 } 512 } 513 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 514 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 515 return Info->OverriddenMacros; 516 return None; 517 } 518 void setOverriddenMacros(Preprocessor &PP, 519 ArrayRef<ModuleMacro *> Overrides) { 520 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 521 if (!Info) { 522 if (Overrides.empty()) 523 return; 524 Info = new (PP.getPreprocessorAllocator()) 525 ModuleMacroInfo(State.get<MacroDirective *>()); 526 State = Info; 527 } 528 Info->OverriddenMacros.clear(); 529 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 530 Overrides.begin(), Overrides.end()); 531 Info->ActiveModuleMacrosGeneration = 0; 532 } 533 }; 534 535 /// For each IdentifierInfo that was associated with a macro, we 536 /// keep a mapping to the history of all macro definitions and #undefs in 537 /// the reverse order (the latest one is in the head of the list). 538 /// 539 /// This mapping lives within the \p CurSubmoduleState. 540 typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap; 541 542 friend class ASTReader; 543 544 struct SubmoduleState; 545 546 /// \brief Information about a submodule that we're currently building. 547 struct BuildingSubmoduleInfo { 548 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 549 SubmoduleState *OuterSubmoduleState, 550 unsigned OuterPendingModuleMacroNames) 551 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 552 OuterSubmoduleState(OuterSubmoduleState), 553 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 554 555 /// The module that we are building. 556 Module *M; 557 /// The location at which the module was included. 558 SourceLocation ImportLoc; 559 /// Whether we entered this submodule via a pragma. 560 bool IsPragma; 561 /// The previous SubmoduleState. 562 SubmoduleState *OuterSubmoduleState; 563 /// The number of pending module macro names when we started building this. 564 unsigned OuterPendingModuleMacroNames; 565 }; 566 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 567 568 /// \brief Information about a submodule's preprocessor state. 569 struct SubmoduleState { 570 /// The macros for the submodule. 571 MacroMap Macros; 572 /// The set of modules that are visible within the submodule. 573 VisibleModuleSet VisibleModules; 574 // FIXME: CounterValue? 575 // FIXME: PragmaPushMacroInfo? 576 }; 577 std::map<Module*, SubmoduleState> Submodules; 578 579 /// The preprocessor state for preprocessing outside of any submodule. 580 SubmoduleState NullSubmoduleState; 581 582 /// The current submodule state. Will be \p NullSubmoduleState if we're not 583 /// in a submodule. 584 SubmoduleState *CurSubmoduleState; 585 586 /// The set of known macros exported from modules. 587 llvm::FoldingSet<ModuleMacro> ModuleMacros; 588 589 /// The names of potential module macros that we've not yet processed. 590 llvm::SmallVector<const IdentifierInfo*, 32> PendingModuleMacroNames; 591 592 /// The list of module macros, for each identifier, that are not overridden by 593 /// any other module macro. 594 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro*>> 595 LeafModuleMacros; 596 597 /// \brief Macros that we want to warn because they are not used at the end 598 /// of the translation unit. 599 /// 600 /// We store just their SourceLocations instead of 601 /// something like MacroInfo*. The benefit of this is that when we are 602 /// deserializing from PCH, we don't need to deserialize identifier & macros 603 /// just so that we can report that they are unused, we just warn using 604 /// the SourceLocations of this set (that will be filled by the ASTReader). 605 /// We are using SmallPtrSet instead of a vector for faster removal. 606 typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy; 607 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 608 609 /// \brief A "freelist" of MacroArg objects that can be 610 /// reused for quick allocation. 611 MacroArgs *MacroArgCache; 612 friend class MacroArgs; 613 614 /// For each IdentifierInfo used in a \#pragma push_macro directive, 615 /// we keep a MacroInfo stack used to restore the previous macro value. 616 llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo; 617 618 // Various statistics we track for performance analysis. 619 unsigned NumDirectives, NumDefined, NumUndefined, NumPragma; 620 unsigned NumIf, NumElse, NumEndif; 621 unsigned NumEnteredSourceFiles, MaxIncludeStackDepth; 622 unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded; 623 unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste; 624 unsigned NumSkipped; 625 626 /// \brief The predefined macros that preprocessor should use from the 627 /// command line etc. 628 std::string Predefines; 629 630 /// \brief The file ID for the preprocessor predefines. 631 FileID PredefinesFileID; 632 633 /// \{ 634 /// \brief Cache of macro expanders to reduce malloc traffic. 635 enum { TokenLexerCacheSize = 8 }; 636 unsigned NumCachedTokenLexers; 637 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 638 /// \} 639 640 /// \brief Keeps macro expanded tokens for TokenLexers. 641 // 642 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 643 /// going to lex in the cache and when it finishes the tokens are removed 644 /// from the end of the cache. 645 SmallVector<Token, 16> MacroExpandedTokens; 646 std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack; 647 648 /// \brief A record of the macro definitions and expansions that 649 /// occurred during preprocessing. 650 /// 651 /// This is an optional side structure that can be enabled with 652 /// \c createPreprocessingRecord() prior to preprocessing. 653 PreprocessingRecord *Record; 654 655 /// Cached tokens state. 656 typedef SmallVector<Token, 1> CachedTokensTy; 657 658 /// \brief Cached tokens are stored here when we do backtracking or 659 /// lookahead. They are "lexed" by the CachingLex() method. 660 CachedTokensTy CachedTokens; 661 662 /// \brief The position of the cached token that CachingLex() should 663 /// "lex" next. 664 /// 665 /// If it points beyond the CachedTokens vector, it means that a normal 666 /// Lex() should be invoked. 667 CachedTokensTy::size_type CachedLexPos; 668 669 /// \brief Stack of backtrack positions, allowing nested backtracks. 670 /// 671 /// The EnableBacktrackAtThisPos() method pushes a position to 672 /// indicate where CachedLexPos should be set when the BackTrack() method is 673 /// invoked (at which point the last position is popped). 674 std::vector<CachedTokensTy::size_type> BacktrackPositions; 675 676 struct MacroInfoChain { 677 MacroInfo MI; 678 MacroInfoChain *Next; 679 }; 680 681 /// MacroInfos are managed as a chain for easy disposal. This is the head 682 /// of that list. 683 MacroInfoChain *MIChainHead; 684 685 void updateOutOfDateIdentifier(IdentifierInfo &II) const; 686 687 public: 688 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 689 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, 690 MemoryBufferCache &PCMCache, 691 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 692 IdentifierInfoLookup *IILookup = nullptr, 693 bool OwnsHeaderSearch = false, 694 TranslationUnitKind TUKind = TU_Complete); 695 696 ~Preprocessor(); 697 698 /// \brief Initialize the preprocessor using information about the target. 699 /// 700 /// \param Target is owned by the caller and must remain valid for the 701 /// lifetime of the preprocessor. 702 /// \param AuxTarget is owned by the caller and must remain valid for 703 /// the lifetime of the preprocessor. 704 void Initialize(const TargetInfo &Target, 705 const TargetInfo *AuxTarget = nullptr); 706 707 /// \brief Initialize the preprocessor to parse a model file 708 /// 709 /// To parse model files the preprocessor of the original source is reused to 710 /// preserver the identifier table. However to avoid some duplicate 711 /// information in the preprocessor some cleanup is needed before it is used 712 /// to parse model files. This method does that cleanup. 713 void InitializeForModelFile(); 714 715 /// \brief Cleanup after model file parsing 716 void FinalizeForModelFile(); 717 718 /// \brief Retrieve the preprocessor options used to initialize this 719 /// preprocessor. 720 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 721 722 DiagnosticsEngine &getDiagnostics() const { return *Diags; } 723 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 724 725 const LangOptions &getLangOpts() const { return LangOpts; } 726 const TargetInfo &getTargetInfo() const { return *Target; } 727 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } 728 FileManager &getFileManager() const { return FileMgr; } 729 SourceManager &getSourceManager() const { return SourceMgr; } 730 MemoryBufferCache &getPCMCache() const { return PCMCache; } 731 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 732 733 IdentifierTable &getIdentifierTable() { return Identifiers; } 734 const IdentifierTable &getIdentifierTable() const { return Identifiers; } 735 SelectorTable &getSelectorTable() { return Selectors; } 736 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; } 737 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 738 739 void setPTHManager(PTHManager* pm); 740 741 PTHManager *getPTHManager() { return PTH.get(); } 742 743 void setExternalSource(ExternalPreprocessorSource *Source) { 744 ExternalSource = Source; 745 } 746 747 ExternalPreprocessorSource *getExternalSource() const { 748 return ExternalSource; 749 } 750 751 /// \brief Retrieve the module loader associated with this preprocessor. 752 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 753 754 bool hadModuleLoaderFatalFailure() const { 755 return TheModuleLoader.HadFatalFailure; 756 } 757 758 /// \brief True if we are currently preprocessing a #if or #elif directive 759 bool isParsingIfOrElifDirective() const { 760 return ParsingIfOrElifDirective; 761 } 762 763 /// \brief Control whether the preprocessor retains comments in output. 764 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 765 this->KeepComments = KeepComments | KeepMacroComments; 766 this->KeepMacroComments = KeepMacroComments; 767 } 768 769 bool getCommentRetentionState() const { return KeepComments; } 770 771 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } 772 bool getPragmasEnabled() const { return PragmasEnabled; } 773 774 void SetSuppressIncludeNotFoundError(bool Suppress) { 775 SuppressIncludeNotFoundError = Suppress; 776 } 777 778 bool GetSuppressIncludeNotFoundError() { 779 return SuppressIncludeNotFoundError; 780 } 781 782 /// Sets whether the preprocessor is responsible for producing output or if 783 /// it is producing tokens to be consumed by Parse and Sema. 784 void setPreprocessedOutput(bool IsPreprocessedOutput) { 785 PreprocessedOutput = IsPreprocessedOutput; 786 } 787 788 /// Returns true if the preprocessor is responsible for generating output, 789 /// false if it is producing tokens to be consumed by Parse and Sema. 790 bool isPreprocessedOutput() const { return PreprocessedOutput; } 791 792 /// \brief Return true if we are lexing directly from the specified lexer. 793 bool isCurrentLexer(const PreprocessorLexer *L) const { 794 return CurPPLexer == L; 795 } 796 797 /// \brief Return the current lexer being lexed from. 798 /// 799 /// Note that this ignores any potentially active macro expansions and _Pragma 800 /// expansions going on at the time. 801 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 802 803 /// \brief Return the current file lexer being lexed from. 804 /// 805 /// Note that this ignores any potentially active macro expansions and _Pragma 806 /// expansions going on at the time. 807 PreprocessorLexer *getCurrentFileLexer() const; 808 809 /// \brief Return the submodule owning the file being lexed. This may not be 810 /// the current module if we have changed modules since entering the file. 811 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 812 813 /// \brief Returns the FileID for the preprocessor predefines. 814 FileID getPredefinesFileID() const { return PredefinesFileID; } 815 816 /// \{ 817 /// \brief Accessors for preprocessor callbacks. 818 /// 819 /// Note that this class takes ownership of any PPCallbacks object given to 820 /// it. 821 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } 822 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 823 if (Callbacks) 824 C = llvm::make_unique<PPChainedCallbacks>(std::move(C), 825 std::move(Callbacks)); 826 Callbacks = std::move(C); 827 } 828 /// \} 829 830 bool isMacroDefined(StringRef Id) { 831 return isMacroDefined(&Identifiers.get(Id)); 832 } 833 bool isMacroDefined(const IdentifierInfo *II) { 834 return II->hasMacroDefinition() && 835 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 836 } 837 838 /// \brief Determine whether II is defined as a macro within the module M, 839 /// if that is a module that we've already preprocessed. Does not check for 840 /// macros imported into M. 841 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 842 if (!II->hasMacroDefinition()) 843 return false; 844 auto I = Submodules.find(M); 845 if (I == Submodules.end()) 846 return false; 847 auto J = I->second.Macros.find(II); 848 if (J == I->second.Macros.end()) 849 return false; 850 auto *MD = J->second.getLatest(); 851 return MD && MD->isDefined(); 852 } 853 854 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 855 if (!II->hasMacroDefinition()) 856 return MacroDefinition(); 857 858 MacroState &S = CurSubmoduleState->Macros[II]; 859 auto *MD = S.getLatest(); 860 while (MD && isa<VisibilityMacroDirective>(MD)) 861 MD = MD->getPrevious(); 862 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 863 S.getActiveModuleMacros(*this, II), 864 S.isAmbiguous(*this, II)); 865 } 866 867 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 868 SourceLocation Loc) { 869 if (!II->hadMacroDefinition()) 870 return MacroDefinition(); 871 872 MacroState &S = CurSubmoduleState->Macros[II]; 873 MacroDirective::DefInfo DI; 874 if (auto *MD = S.getLatest()) 875 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 876 // FIXME: Compute the set of active module macros at the specified location. 877 return MacroDefinition(DI.getDirective(), 878 S.getActiveModuleMacros(*this, II), 879 S.isAmbiguous(*this, II)); 880 } 881 882 /// \brief Given an identifier, return its latest non-imported MacroDirective 883 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. 884 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 885 if (!II->hasMacroDefinition()) 886 return nullptr; 887 888 auto *MD = getLocalMacroDirectiveHistory(II); 889 if (!MD || MD->getDefinition().isUndefined()) 890 return nullptr; 891 892 return MD; 893 } 894 895 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 896 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 897 } 898 899 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 900 if (!II->hasMacroDefinition()) 901 return nullptr; 902 if (auto MD = getMacroDefinition(II)) 903 return MD.getMacroInfo(); 904 return nullptr; 905 } 906 907 /// \brief Given an identifier, return the latest non-imported macro 908 /// directive for that identifier. 909 /// 910 /// One can iterate over all previous macro directives from the most recent 911 /// one. 912 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 913 914 /// \brief Add a directive to the macro directive history for this identifier. 915 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); 916 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 917 SourceLocation Loc) { 918 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 919 appendMacroDirective(II, MD); 920 return MD; 921 } 922 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 923 MacroInfo *MI) { 924 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 925 } 926 /// \brief Set a MacroDirective that was loaded from a PCH file. 927 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 928 MacroDirective *MD); 929 930 /// \brief Register an exported macro for a module and identifier. 931 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 932 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 933 ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II); 934 935 /// \brief Get the list of leaf (non-overridden) module macros for a name. 936 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 937 if (II->isOutOfDate()) 938 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 939 auto I = LeafModuleMacros.find(II); 940 if (I != LeafModuleMacros.end()) 941 return I->second; 942 return None; 943 } 944 945 /// \{ 946 /// Iterators for the macro history table. Currently defined macros have 947 /// IdentifierInfo::hasMacroDefinition() set and an empty 948 /// MacroInfo::getUndefLoc() at the head of the list. 949 typedef MacroMap::const_iterator macro_iterator; 950 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 951 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 952 llvm::iterator_range<macro_iterator> 953 macros(bool IncludeExternalMacros = true) const { 954 return llvm::make_range(macro_begin(IncludeExternalMacros), 955 macro_end(IncludeExternalMacros)); 956 } 957 /// \} 958 959 /// \brief Return the name of the macro defined before \p Loc that has 960 /// spelling \p Tokens. If there are multiple macros with same spelling, 961 /// return the last one defined. 962 StringRef getLastMacroWithSpelling(SourceLocation Loc, 963 ArrayRef<TokenValue> Tokens) const; 964 965 const std::string &getPredefines() const { return Predefines; } 966 /// \brief Set the predefines for this Preprocessor. 967 /// 968 /// These predefines are automatically injected when parsing the main file. 969 void setPredefines(const char *P) { Predefines = P; } 970 void setPredefines(StringRef P) { Predefines = P; } 971 972 /// Return information about the specified preprocessor 973 /// identifier token. 974 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 975 return &Identifiers.get(Name); 976 } 977 978 /// \brief Add the specified pragma handler to this preprocessor. 979 /// 980 /// If \p Namespace is non-null, then it is a token required to exist on the 981 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 982 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); 983 void AddPragmaHandler(PragmaHandler *Handler) { 984 AddPragmaHandler(StringRef(), Handler); 985 } 986 987 /// \brief Remove the specific pragma handler from this preprocessor. 988 /// 989 /// If \p Namespace is non-null, then it should be the namespace that 990 /// \p Handler was added to. It is an error to remove a handler that 991 /// has not been registered. 992 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); 993 void RemovePragmaHandler(PragmaHandler *Handler) { 994 RemovePragmaHandler(StringRef(), Handler); 995 } 996 997 /// Install empty handlers for all pragmas (making them ignored). 998 void IgnorePragmas(); 999 1000 /// \brief Add the specified comment handler to the preprocessor. 1001 void addCommentHandler(CommentHandler *Handler); 1002 1003 /// \brief Remove the specified comment handler. 1004 /// 1005 /// It is an error to remove a handler that has not been registered. 1006 void removeCommentHandler(CommentHandler *Handler); 1007 1008 /// \brief Set the code completion handler to the given object. 1009 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1010 CodeComplete = &Handler; 1011 } 1012 1013 /// \brief Retrieve the current code-completion handler. 1014 CodeCompletionHandler *getCodeCompletionHandler() const { 1015 return CodeComplete; 1016 } 1017 1018 /// \brief Clear out the code completion handler. 1019 void clearCodeCompletionHandler() { 1020 CodeComplete = nullptr; 1021 } 1022 1023 /// \brief Hook used by the lexer to invoke the "natural language" code 1024 /// completion point. 1025 void CodeCompleteNaturalLanguage(); 1026 1027 /// \brief Set the code completion token for filtering purposes. 1028 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1029 CodeCompletionII = Filter; 1030 } 1031 1032 /// \brief Get the code completion token for filtering purposes. 1033 StringRef getCodeCompletionFilter() { 1034 if (CodeCompletionII) 1035 return CodeCompletionII->getName(); 1036 return {}; 1037 } 1038 1039 /// \brief Retrieve the preprocessing record, or NULL if there is no 1040 /// preprocessing record. 1041 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1042 1043 /// \brief Create a new preprocessing record, which will keep track of 1044 /// all macro expansions, macro definitions, etc. 1045 void createPreprocessingRecord(); 1046 1047 /// \brief Enter the specified FileID as the main source file, 1048 /// which implicitly adds the builtin defines etc. 1049 void EnterMainSourceFile(); 1050 1051 /// \brief Inform the preprocessor callbacks that processing is complete. 1052 void EndSourceFile(); 1053 1054 /// \brief Add a source file to the top of the include stack and 1055 /// start lexing tokens from it instead of the current buffer. 1056 /// 1057 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1058 bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir, 1059 SourceLocation Loc); 1060 1061 /// \brief Add a Macro to the top of the include stack and start lexing 1062 /// tokens from it instead of the current buffer. 1063 /// 1064 /// \param Args specifies the tokens input to a function-like macro. 1065 /// \param ILEnd specifies the location of the ')' for a function-like macro 1066 /// or the identifier for an object-like macro. 1067 void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro, 1068 MacroArgs *Args); 1069 1070 /// \brief Add a "macro" context to the top of the include stack, 1071 /// which will cause the lexer to start returning the specified tokens. 1072 /// 1073 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1074 /// will not be subject to further macro expansion. Otherwise, these tokens 1075 /// will be re-macro-expanded when/if expansion is enabled. 1076 /// 1077 /// If \p OwnsTokens is false, this method assumes that the specified stream 1078 /// of tokens has a permanent owner somewhere, so they do not need to be 1079 /// copied. If it is true, it assumes the array of tokens is allocated with 1080 /// \c new[] and the Preprocessor will delete[] it. 1081 private: 1082 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1083 bool DisableMacroExpansion, bool OwnsTokens); 1084 1085 public: 1086 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1087 bool DisableMacroExpansion) { 1088 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true); 1089 } 1090 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) { 1091 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false); 1092 } 1093 1094 /// \brief Pop the current lexer/macro exp off the top of the lexer stack. 1095 /// 1096 /// This should only be used in situations where the current state of the 1097 /// top-of-stack lexer is known. 1098 void RemoveTopOfLexerStack(); 1099 1100 /// From the point that this method is called, and until 1101 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1102 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1103 /// make the Preprocessor re-lex the same tokens. 1104 /// 1105 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1106 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1107 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1108 /// 1109 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1110 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1111 /// tokens will continue indefinitely. 1112 /// 1113 void EnableBacktrackAtThisPos(); 1114 1115 /// \brief Disable the last EnableBacktrackAtThisPos call. 1116 void CommitBacktrackedTokens(); 1117 1118 struct CachedTokensRange { 1119 CachedTokensTy::size_type Begin, End; 1120 }; 1121 1122 private: 1123 /// \brief A range of cached tokens that should be erased after lexing 1124 /// when backtracking requires the erasure of such cached tokens. 1125 Optional<CachedTokensRange> CachedTokenRangeToErase; 1126 1127 public: 1128 /// \brief Returns the range of cached tokens that were lexed since 1129 /// EnableBacktrackAtThisPos() was previously called. 1130 CachedTokensRange LastCachedTokenRange(); 1131 1132 /// \brief Erase the range of cached tokens that were lexed since 1133 /// EnableBacktrackAtThisPos() was previously called. 1134 void EraseCachedTokens(CachedTokensRange TokenRange); 1135 1136 /// \brief Make Preprocessor re-lex the tokens that were lexed since 1137 /// EnableBacktrackAtThisPos() was previously called. 1138 void Backtrack(); 1139 1140 /// \brief True if EnableBacktrackAtThisPos() was called and 1141 /// caching of tokens is on. 1142 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1143 1144 /// \brief Lex the next token for this preprocessor. 1145 void Lex(Token &Result); 1146 1147 void LexAfterModuleImport(Token &Result); 1148 1149 void makeModuleVisible(Module *M, SourceLocation Loc); 1150 1151 SourceLocation getModuleImportLoc(Module *M) const { 1152 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1153 } 1154 1155 /// \brief Lex a string literal, which may be the concatenation of multiple 1156 /// string literals and may even come from macro expansion. 1157 /// \returns true on success, false if a error diagnostic has been generated. 1158 bool LexStringLiteral(Token &Result, std::string &String, 1159 const char *DiagnosticTag, bool AllowMacroExpansion) { 1160 if (AllowMacroExpansion) 1161 Lex(Result); 1162 else 1163 LexUnexpandedToken(Result); 1164 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1165 AllowMacroExpansion); 1166 } 1167 1168 /// \brief Complete the lexing of a string literal where the first token has 1169 /// already been lexed (see LexStringLiteral). 1170 bool FinishLexStringLiteral(Token &Result, std::string &String, 1171 const char *DiagnosticTag, 1172 bool AllowMacroExpansion); 1173 1174 /// \brief Lex a token. If it's a comment, keep lexing until we get 1175 /// something not a comment. 1176 /// 1177 /// This is useful in -E -C mode where comments would foul up preprocessor 1178 /// directive handling. 1179 void LexNonComment(Token &Result) { 1180 do 1181 Lex(Result); 1182 while (Result.getKind() == tok::comment); 1183 } 1184 1185 /// \brief Just like Lex, but disables macro expansion of identifier tokens. 1186 void LexUnexpandedToken(Token &Result) { 1187 // Disable macro expansion. 1188 bool OldVal = DisableMacroExpansion; 1189 DisableMacroExpansion = true; 1190 // Lex the token. 1191 Lex(Result); 1192 1193 // Reenable it. 1194 DisableMacroExpansion = OldVal; 1195 } 1196 1197 /// \brief Like LexNonComment, but this disables macro expansion of 1198 /// identifier tokens. 1199 void LexUnexpandedNonComment(Token &Result) { 1200 do 1201 LexUnexpandedToken(Result); 1202 while (Result.getKind() == tok::comment); 1203 } 1204 1205 /// \brief Parses a simple integer literal to get its numeric value. Floating 1206 /// point literals and user defined literals are rejected. Used primarily to 1207 /// handle pragmas that accept integer arguments. 1208 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1209 1210 /// Disables macro expansion everywhere except for preprocessor directives. 1211 void SetMacroExpansionOnlyInDirectives() { 1212 DisableMacroExpansion = true; 1213 MacroExpansionInDirectivesOverride = true; 1214 } 1215 1216 /// \brief Peeks ahead N tokens and returns that token without consuming any 1217 /// tokens. 1218 /// 1219 /// LookAhead(0) returns the next token that would be returned by Lex(), 1220 /// LookAhead(1) returns the token after it, etc. This returns normal 1221 /// tokens after phase 5. As such, it is equivalent to using 1222 /// 'Lex', not 'LexUnexpandedToken'. 1223 const Token &LookAhead(unsigned N) { 1224 if (CachedLexPos + N < CachedTokens.size()) 1225 return CachedTokens[CachedLexPos+N]; 1226 else 1227 return PeekAhead(N+1); 1228 } 1229 1230 /// \brief When backtracking is enabled and tokens are cached, 1231 /// this allows to revert a specific number of tokens. 1232 /// 1233 /// Note that the number of tokens being reverted should be up to the last 1234 /// backtrack position, not more. 1235 void RevertCachedTokens(unsigned N) { 1236 assert(isBacktrackEnabled() && 1237 "Should only be called when tokens are cached for backtracking"); 1238 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1239 && "Should revert tokens up to the last backtrack position, not more"); 1240 assert(signed(CachedLexPos) - signed(N) >= 0 && 1241 "Corrupted backtrack positions ?"); 1242 CachedLexPos -= N; 1243 } 1244 1245 /// \brief Enters a token in the token stream to be lexed next. 1246 /// 1247 /// If BackTrack() is called afterwards, the token will remain at the 1248 /// insertion point. 1249 void EnterToken(const Token &Tok) { 1250 EnterCachingLexMode(); 1251 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1252 } 1253 1254 /// We notify the Preprocessor that if it is caching tokens (because 1255 /// backtrack is enabled) it should replace the most recent cached tokens 1256 /// with the given annotation token. This function has no effect if 1257 /// backtracking is not enabled. 1258 /// 1259 /// Note that the use of this function is just for optimization, so that the 1260 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1261 /// invoked. 1262 void AnnotateCachedTokens(const Token &Tok) { 1263 assert(Tok.isAnnotation() && "Expected annotation token"); 1264 if (CachedLexPos != 0 && isBacktrackEnabled()) 1265 AnnotatePreviousCachedTokens(Tok); 1266 } 1267 1268 /// Get the location of the last cached token, suitable for setting the end 1269 /// location of an annotation token. 1270 SourceLocation getLastCachedTokenLocation() const { 1271 assert(CachedLexPos != 0); 1272 return CachedTokens[CachedLexPos-1].getLastLoc(); 1273 } 1274 1275 /// \brief Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1276 /// CachedTokens. 1277 bool IsPreviousCachedToken(const Token &Tok) const; 1278 1279 /// \brief Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1280 /// in \p NewToks. 1281 /// 1282 /// Useful when a token needs to be split in smaller ones and CachedTokens 1283 /// most recent token must to be updated to reflect that. 1284 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1285 1286 /// \brief Replace the last token with an annotation token. 1287 /// 1288 /// Like AnnotateCachedTokens(), this routine replaces an 1289 /// already-parsed (and resolved) token with an annotation 1290 /// token. However, this routine only replaces the last token with 1291 /// the annotation token; it does not affect any other cached 1292 /// tokens. This function has no effect if backtracking is not 1293 /// enabled. 1294 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1295 assert(Tok.isAnnotation() && "Expected annotation token"); 1296 if (CachedLexPos != 0 && isBacktrackEnabled()) 1297 CachedTokens[CachedLexPos-1] = Tok; 1298 } 1299 1300 /// Enter an annotation token into the token stream. 1301 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1302 void *AnnotationVal); 1303 1304 /// Update the current token to represent the provided 1305 /// identifier, in order to cache an action performed by typo correction. 1306 void TypoCorrectToken(const Token &Tok) { 1307 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1308 if (CachedLexPos != 0 && isBacktrackEnabled()) 1309 CachedTokens[CachedLexPos-1] = Tok; 1310 } 1311 1312 /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/ 1313 /// CurTokenLexer pointers. 1314 void recomputeCurLexerKind(); 1315 1316 /// \brief Returns true if incremental processing is enabled 1317 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1318 1319 /// \brief Enables the incremental processing 1320 void enableIncrementalProcessing(bool value = true) { 1321 IncrementalProcessing = value; 1322 } 1323 1324 /// \brief Specify the point at which code-completion will be performed. 1325 /// 1326 /// \param File the file in which code completion should occur. If 1327 /// this file is included multiple times, code-completion will 1328 /// perform completion the first time it is included. If NULL, this 1329 /// function clears out the code-completion point. 1330 /// 1331 /// \param Line the line at which code completion should occur 1332 /// (1-based). 1333 /// 1334 /// \param Column the column at which code completion should occur 1335 /// (1-based). 1336 /// 1337 /// \returns true if an error occurred, false otherwise. 1338 bool SetCodeCompletionPoint(const FileEntry *File, 1339 unsigned Line, unsigned Column); 1340 1341 /// \brief Determine if we are performing code completion. 1342 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1343 1344 /// \brief Returns the location of the code-completion point. 1345 /// 1346 /// Returns an invalid location if code-completion is not enabled or the file 1347 /// containing the code-completion point has not been lexed yet. 1348 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1349 1350 /// \brief Returns the start location of the file of code-completion point. 1351 /// 1352 /// Returns an invalid location if code-completion is not enabled or the file 1353 /// containing the code-completion point has not been lexed yet. 1354 SourceLocation getCodeCompletionFileLoc() const { 1355 return CodeCompletionFileLoc; 1356 } 1357 1358 /// \brief Returns true if code-completion is enabled and we have hit the 1359 /// code-completion point. 1360 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1361 1362 /// \brief Note that we hit the code-completion point. 1363 void setCodeCompletionReached() { 1364 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1365 CodeCompletionReached = true; 1366 // Silence any diagnostics that occur after we hit the code-completion. 1367 getDiagnostics().setSuppressAllDiagnostics(true); 1368 } 1369 1370 /// \brief The location of the currently-active \#pragma clang 1371 /// arc_cf_code_audited begin. 1372 /// 1373 /// Returns an invalid location if there is no such pragma active. 1374 SourceLocation getPragmaARCCFCodeAuditedLoc() const { 1375 return PragmaARCCFCodeAuditedLoc; 1376 } 1377 1378 /// \brief Set the location of the currently-active \#pragma clang 1379 /// arc_cf_code_audited begin. An invalid location ends the pragma. 1380 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) { 1381 PragmaARCCFCodeAuditedLoc = Loc; 1382 } 1383 1384 /// \brief The location of the currently-active \#pragma clang 1385 /// assume_nonnull begin. 1386 /// 1387 /// Returns an invalid location if there is no such pragma active. 1388 SourceLocation getPragmaAssumeNonNullLoc() const { 1389 return PragmaAssumeNonNullLoc; 1390 } 1391 1392 /// \brief Set the location of the currently-active \#pragma clang 1393 /// assume_nonnull begin. An invalid location ends the pragma. 1394 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1395 PragmaAssumeNonNullLoc = Loc; 1396 } 1397 1398 /// \brief Set the directory in which the main file should be considered 1399 /// to have been found, if it is not a real file. 1400 void setMainFileDir(const DirectoryEntry *Dir) { 1401 MainFileDir = Dir; 1402 } 1403 1404 /// \brief Instruct the preprocessor to skip part of the main source file. 1405 /// 1406 /// \param Bytes The number of bytes in the preamble to skip. 1407 /// 1408 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 1409 /// start of a line. 1410 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1411 SkipMainFilePreamble.first = Bytes; 1412 SkipMainFilePreamble.second = StartOfLine; 1413 } 1414 1415 /// Forwarding function for diagnostics. This emits a diagnostic at 1416 /// the specified Token's location, translating the token's start 1417 /// position in the current buffer into a SourcePosition object for rendering. 1418 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1419 return Diags->Report(Loc, DiagID); 1420 } 1421 1422 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1423 return Diags->Report(Tok.getLocation(), DiagID); 1424 } 1425 1426 /// Return the 'spelling' of the token at the given 1427 /// location; does not go up to the spelling location or down to the 1428 /// expansion location. 1429 /// 1430 /// \param buffer A buffer which will be used only if the token requires 1431 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1432 /// \param invalid If non-null, will be set \c true if an error occurs. 1433 StringRef getSpelling(SourceLocation loc, 1434 SmallVectorImpl<char> &buffer, 1435 bool *invalid = nullptr) const { 1436 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1437 } 1438 1439 /// \brief Return the 'spelling' of the Tok token. 1440 /// 1441 /// The spelling of a token is the characters used to represent the token in 1442 /// the source file after trigraph expansion and escaped-newline folding. In 1443 /// particular, this wants to get the true, uncanonicalized, spelling of 1444 /// things like digraphs, UCNs, etc. 1445 /// 1446 /// \param Invalid If non-null, will be set \c true if an error occurs. 1447 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1448 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1449 } 1450 1451 /// \brief Get the spelling of a token into a preallocated buffer, instead 1452 /// of as an std::string. 1453 /// 1454 /// The caller is required to allocate enough space for the token, which is 1455 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1456 /// actual result is returned. 1457 /// 1458 /// Note that this method may do two possible things: it may either fill in 1459 /// the buffer specified with characters, or it may *change the input pointer* 1460 /// to point to a constant buffer with the data already in it (avoiding a 1461 /// copy). The caller is not allowed to modify the returned buffer pointer 1462 /// if an internal buffer is returned. 1463 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1464 bool *Invalid = nullptr) const { 1465 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1466 } 1467 1468 /// \brief Get the spelling of a token into a SmallVector. 1469 /// 1470 /// Note that the returned StringRef may not point to the 1471 /// supplied buffer if a copy can be avoided. 1472 StringRef getSpelling(const Token &Tok, 1473 SmallVectorImpl<char> &Buffer, 1474 bool *Invalid = nullptr) const; 1475 1476 /// \brief Relex the token at the specified location. 1477 /// \returns true if there was a failure, false on success. 1478 bool getRawToken(SourceLocation Loc, Token &Result, 1479 bool IgnoreWhiteSpace = false) { 1480 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1481 } 1482 1483 /// \brief Given a Token \p Tok that is a numeric constant with length 1, 1484 /// return the character. 1485 char 1486 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1487 bool *Invalid = nullptr) const { 1488 assert(Tok.is(tok::numeric_constant) && 1489 Tok.getLength() == 1 && "Called on unsupported token"); 1490 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1491 1492 // If the token is carrying a literal data pointer, just use it. 1493 if (const char *D = Tok.getLiteralData()) 1494 return *D; 1495 1496 // Otherwise, fall back on getCharacterData, which is slower, but always 1497 // works. 1498 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1499 } 1500 1501 /// \brief Retrieve the name of the immediate macro expansion. 1502 /// 1503 /// This routine starts from a source location, and finds the name of the 1504 /// macro responsible for its immediate expansion. It looks through any 1505 /// intervening macro argument expansions to compute this. It returns a 1506 /// StringRef that refers to the SourceManager-owned buffer of the source 1507 /// where that macro name is spelled. Thus, the result shouldn't out-live 1508 /// the SourceManager. 1509 StringRef getImmediateMacroName(SourceLocation Loc) { 1510 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1511 } 1512 1513 /// \brief Plop the specified string into a scratch buffer and set the 1514 /// specified token's location and length to it. 1515 /// 1516 /// If specified, the source location provides a location of the expansion 1517 /// point of the token. 1518 void CreateString(StringRef Str, Token &Tok, 1519 SourceLocation ExpansionLocStart = SourceLocation(), 1520 SourceLocation ExpansionLocEnd = SourceLocation()); 1521 1522 /// \brief Computes the source location just past the end of the 1523 /// token at this source location. 1524 /// 1525 /// This routine can be used to produce a source location that 1526 /// points just past the end of the token referenced by \p Loc, and 1527 /// is generally used when a diagnostic needs to point just after a 1528 /// token where it expected something different that it received. If 1529 /// the returned source location would not be meaningful (e.g., if 1530 /// it points into a macro), this routine returns an invalid 1531 /// source location. 1532 /// 1533 /// \param Offset an offset from the end of the token, where the source 1534 /// location should refer to. The default offset (0) produces a source 1535 /// location pointing just past the end of the token; an offset of 1 produces 1536 /// a source location pointing to the last character in the token, etc. 1537 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 1538 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 1539 } 1540 1541 /// \brief Returns true if the given MacroID location points at the first 1542 /// token of the macro expansion. 1543 /// 1544 /// \param MacroBegin If non-null and function returns true, it is set to 1545 /// begin location of the macro. 1546 bool isAtStartOfMacroExpansion(SourceLocation loc, 1547 SourceLocation *MacroBegin = nullptr) const { 1548 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 1549 MacroBegin); 1550 } 1551 1552 /// \brief Returns true if the given MacroID location points at the last 1553 /// token of the macro expansion. 1554 /// 1555 /// \param MacroEnd If non-null and function returns true, it is set to 1556 /// end location of the macro. 1557 bool isAtEndOfMacroExpansion(SourceLocation loc, 1558 SourceLocation *MacroEnd = nullptr) const { 1559 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 1560 } 1561 1562 /// \brief Print the token to stderr, used for debugging. 1563 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 1564 void DumpLocation(SourceLocation Loc) const; 1565 void DumpMacro(const MacroInfo &MI) const; 1566 void dumpMacroInfo(const IdentifierInfo *II); 1567 1568 /// \brief Given a location that specifies the start of a 1569 /// token, return a new location that specifies a character within the token. 1570 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 1571 unsigned Char) const { 1572 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 1573 } 1574 1575 /// \brief Increment the counters for the number of token paste operations 1576 /// performed. 1577 /// 1578 /// If fast was specified, this is a 'fast paste' case we handled. 1579 void IncrementPasteCounter(bool isFast) { 1580 if (isFast) 1581 ++NumFastTokenPaste; 1582 else 1583 ++NumTokenPaste; 1584 } 1585 1586 void PrintStats(); 1587 1588 size_t getTotalMemory() const; 1589 1590 /// When the macro expander pastes together a comment (/##/) in Microsoft 1591 /// mode, this method handles updating the current state, returning the 1592 /// token on the next source line. 1593 void HandleMicrosoftCommentPaste(Token &Tok); 1594 1595 //===--------------------------------------------------------------------===// 1596 // Preprocessor callback methods. These are invoked by a lexer as various 1597 // directives and events are found. 1598 1599 /// Given a tok::raw_identifier token, look up the 1600 /// identifier information for the token and install it into the token, 1601 /// updating the token kind accordingly. 1602 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 1603 1604 private: 1605 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 1606 1607 public: 1608 1609 /// \brief Specifies the reason for poisoning an identifier. 1610 /// 1611 /// If that identifier is accessed while poisoned, then this reason will be 1612 /// used instead of the default "poisoned" diagnostic. 1613 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 1614 1615 /// \brief Display reason for poisoned identifier. 1616 void HandlePoisonedIdentifier(Token & Tok); 1617 1618 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 1619 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 1620 if(II->isPoisoned()) { 1621 HandlePoisonedIdentifier(Identifier); 1622 } 1623 } 1624 } 1625 1626 private: 1627 /// Identifiers used for SEH handling in Borland. These are only 1628 /// allowed in particular circumstances 1629 // __except block 1630 IdentifierInfo *Ident__exception_code, 1631 *Ident___exception_code, 1632 *Ident_GetExceptionCode; 1633 // __except filter expression 1634 IdentifierInfo *Ident__exception_info, 1635 *Ident___exception_info, 1636 *Ident_GetExceptionInfo; 1637 // __finally 1638 IdentifierInfo *Ident__abnormal_termination, 1639 *Ident___abnormal_termination, 1640 *Ident_AbnormalTermination; 1641 1642 const char *getCurLexerEndPos(); 1643 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 1644 1645 public: 1646 void PoisonSEHIdentifiers(bool Poison = true); // Borland 1647 1648 /// \brief Callback invoked when the lexer reads an identifier and has 1649 /// filled in the tokens IdentifierInfo member. 1650 /// 1651 /// This callback potentially macro expands it or turns it into a named 1652 /// token (like 'for'). 1653 /// 1654 /// \returns true if we actually computed a token, false if we need to 1655 /// lex again. 1656 bool HandleIdentifier(Token &Identifier); 1657 1658 1659 /// \brief Callback invoked when the lexer hits the end of the current file. 1660 /// 1661 /// This either returns the EOF token and returns true, or 1662 /// pops a level off the include stack and returns false, at which point the 1663 /// client should call lex again. 1664 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 1665 1666 /// \brief Callback invoked when the current TokenLexer hits the end of its 1667 /// token stream. 1668 bool HandleEndOfTokenLexer(Token &Result); 1669 1670 /// \brief Callback invoked when the lexer sees a # token at the start of a 1671 /// line. 1672 /// 1673 /// This consumes the directive, modifies the lexer/preprocessor state, and 1674 /// advances the lexer(s) so that the next token read is the correct one. 1675 void HandleDirective(Token &Result); 1676 1677 /// \brief Ensure that the next token is a tok::eod token. 1678 /// 1679 /// If not, emit a diagnostic and consume up until the eod. 1680 /// If \p EnableMacros is true, then we consider macros that expand to zero 1681 /// tokens as being ok. 1682 void CheckEndOfDirective(const char *Directive, bool EnableMacros = false); 1683 1684 /// \brief Read and discard all tokens remaining on the current line until 1685 /// the tok::eod token is found. 1686 void DiscardUntilEndOfDirective(); 1687 1688 /// \brief Returns true if the preprocessor has seen a use of 1689 /// __DATE__ or __TIME__ in the file so far. 1690 bool SawDateOrTime() const { 1691 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 1692 } 1693 unsigned getCounterValue() const { return CounterValue; } 1694 void setCounterValue(unsigned V) { CounterValue = V; } 1695 1696 /// \brief Retrieves the module that we're currently building, if any. 1697 Module *getCurrentModule(); 1698 1699 /// \brief Allocate a new MacroInfo object with the provided SourceLocation. 1700 MacroInfo *AllocateMacroInfo(SourceLocation L); 1701 1702 /// \brief Turn the specified lexer token into a fully checked and spelled 1703 /// filename, e.g. as an operand of \#include. 1704 /// 1705 /// The caller is expected to provide a buffer that is large enough to hold 1706 /// the spelling of the filename, but is also expected to handle the case 1707 /// when this method decides to use a different buffer. 1708 /// 1709 /// \returns true if the input filename was in <>'s or false if it was 1710 /// in ""'s. 1711 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename); 1712 1713 /// \brief Given a "foo" or \<foo> reference, look up the indicated file. 1714 /// 1715 /// Returns null on failure. \p isAngled indicates whether the file 1716 /// reference is for system \#include's or not (i.e. using <> instead of ""). 1717 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename, 1718 bool isAngled, const DirectoryLookup *FromDir, 1719 const FileEntry *FromFile, 1720 const DirectoryLookup *&CurDir, 1721 SmallVectorImpl<char> *SearchPath, 1722 SmallVectorImpl<char> *RelativePath, 1723 ModuleMap::KnownHeader *SuggestedModule, 1724 bool *IsMapped, bool SkipCache = false); 1725 1726 /// \brief Get the DirectoryLookup structure used to find the current 1727 /// FileEntry, if CurLexer is non-null and if applicable. 1728 /// 1729 /// This allows us to implement \#include_next and find directory-specific 1730 /// properties. 1731 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; } 1732 1733 /// \brief Return true if we're in the top-level file, not in a \#include. 1734 bool isInPrimaryFile() const; 1735 1736 /// \brief Return true if we're in the main file (specifically, if we are 0 1737 /// (zero) levels deep \#include. This is used by the lexer to determine if 1738 /// it needs to generate errors about unterminated \#if directives. 1739 bool isInMainFile() const; 1740 1741 /// \brief Handle cases where the \#include name is expanded 1742 /// from a macro as multiple tokens, which need to be glued together. 1743 /// 1744 /// This occurs for code like: 1745 /// \code 1746 /// \#define FOO <x/y.h> 1747 /// \#include FOO 1748 /// \endcode 1749 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one. 1750 /// 1751 /// This code concatenates and consumes tokens up to the '>' token. It 1752 /// returns false if the > was found, otherwise it returns true if it finds 1753 /// and consumes the EOD marker. 1754 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer, 1755 SourceLocation &End); 1756 1757 /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is 1758 /// followed by EOD. Return true if the token is not a valid on-off-switch. 1759 bool LexOnOffSwitch(tok::OnOffSwitch &OOS); 1760 1761 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 1762 bool *ShadowFlag = nullptr); 1763 1764 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 1765 Module *LeaveSubmodule(bool ForPragma); 1766 1767 private: 1768 void PushIncludeMacroStack() { 1769 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); 1770 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule, 1771 std::move(CurLexer), std::move(CurPTHLexer), 1772 CurPPLexer, std::move(CurTokenLexer), 1773 CurDirLookup); 1774 CurPPLexer = nullptr; 1775 } 1776 1777 void PopIncludeMacroStack() { 1778 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 1779 CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer); 1780 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 1781 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 1782 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 1783 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 1784 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 1785 IncludeMacroStack.pop_back(); 1786 } 1787 1788 void PropagateLineStartLeadingSpaceInfo(Token &Result); 1789 1790 /// Determine whether we need to create module macros for #defines in the 1791 /// current context. 1792 bool needModuleMacros() const; 1793 1794 /// Update the set of active module macros and ambiguity flag for a module 1795 /// macro name. 1796 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 1797 1798 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 1799 SourceLocation Loc); 1800 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 1801 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 1802 bool isPublic); 1803 1804 /// \brief Lex and validate a macro name, which occurs after a 1805 /// \#define or \#undef. 1806 /// 1807 /// \param MacroNameTok Token that represents the name defined or undefined. 1808 /// \param IsDefineUndef Kind if preprocessor directive. 1809 /// \param ShadowFlag Points to flag that is set if macro name shadows 1810 /// a keyword. 1811 /// 1812 /// This emits a diagnostic, sets the token kind to eod, 1813 /// and discards the rest of the macro line if the macro name is invalid. 1814 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 1815 bool *ShadowFlag = nullptr); 1816 1817 /// The ( starting an argument list of a macro definition has just been read. 1818 /// Lex the rest of the arguments and the closing ), updating \p MI with 1819 /// what we learn and saving in \p LastTok the last token read. 1820 /// Return true if an error occurs parsing the arg list. 1821 bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok); 1822 1823 /// We just read a \#if or related directive and decided that the 1824 /// subsequent tokens are in the \#if'd out portion of the 1825 /// file. Lex the rest of the file, until we see an \#endif. If \p 1826 /// FoundNonSkipPortion is true, then we have already emitted code for part of 1827 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 1828 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 1829 /// already seen one so a \#else directive is a duplicate. When this returns, 1830 /// the caller can lex the first valid token. 1831 void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, 1832 bool FoundNonSkipPortion, bool FoundElse, 1833 SourceLocation ElseLoc = SourceLocation()); 1834 1835 /// \brief A fast PTH version of SkipExcludedConditionalBlock. 1836 void PTHSkipExcludedConditionalBlock(); 1837 1838 /// \brief Evaluate an integer constant expression that may occur after a 1839 /// \#if or \#elif directive and return it as a bool. 1840 /// 1841 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 1842 bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 1843 1844 /// \brief Install the standard preprocessor pragmas: 1845 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 1846 void RegisterBuiltinPragmas(); 1847 1848 /// \brief Register builtin macros such as __LINE__ with the identifier table. 1849 void RegisterBuiltinMacros(); 1850 1851 /// If an identifier token is read that is to be expanded as a macro, handle 1852 /// it and return the next token as 'Tok'. If we lexed a token, return true; 1853 /// otherwise the caller should lex again. 1854 bool HandleMacroExpandedIdentifier(Token &Tok, const MacroDefinition &MD); 1855 1856 /// \brief Cache macro expanded tokens for TokenLexers. 1857 // 1858 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1859 /// going to lex in the cache and when it finishes the tokens are removed 1860 /// from the end of the cache. 1861 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 1862 ArrayRef<Token> tokens); 1863 void removeCachedMacroExpandedTokensOfLastLexer(); 1864 friend void TokenLexer::ExpandFunctionArguments(); 1865 1866 /// Determine whether the next preprocessor token to be 1867 /// lexed is a '('. If so, consume the token and return true, if not, this 1868 /// method should have no observable side-effect on the lexed tokens. 1869 bool isNextPPTokenLParen(); 1870 1871 /// After reading "MACRO(", this method is invoked to read all of the formal 1872 /// arguments specified for the macro invocation. Returns null on error. 1873 MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI, 1874 SourceLocation &ExpansionEnd); 1875 1876 /// \brief If an identifier token is read that is to be expanded 1877 /// as a builtin macro, handle it and return the next token as 'Tok'. 1878 void ExpandBuiltinMacro(Token &Tok); 1879 1880 /// \brief Read a \c _Pragma directive, slice it up, process it, then 1881 /// return the first token after the directive. 1882 /// This assumes that the \c _Pragma token has just been read into \p Tok. 1883 void Handle_Pragma(Token &Tok); 1884 1885 /// \brief Like Handle_Pragma except the pragma text is not enclosed within 1886 /// a string literal. 1887 void HandleMicrosoft__pragma(Token &Tok); 1888 1889 /// \brief Add a lexer to the top of the include stack and 1890 /// start lexing tokens from it instead of the current buffer. 1891 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); 1892 1893 /// \brief Add a lexer to the top of the include stack and 1894 /// start getting tokens from it using the PTH cache. 1895 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir); 1896 1897 /// \brief Set the FileID for the preprocessor predefines. 1898 void setPredefinesFileID(FileID FID) { 1899 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 1900 PredefinesFileID = FID; 1901 } 1902 1903 /// \brief Returns true if we are lexing from a file and not a 1904 /// pragma or a macro. 1905 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 1906 return L ? !L->isPragmaLexer() : P != nullptr; 1907 } 1908 1909 static bool IsFileLexer(const IncludeStackInfo& I) { 1910 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 1911 } 1912 1913 bool IsFileLexer() const { 1914 return IsFileLexer(CurLexer.get(), CurPPLexer); 1915 } 1916 1917 //===--------------------------------------------------------------------===// 1918 // Caching stuff. 1919 void CachingLex(Token &Result); 1920 bool InCachingLexMode() const { 1921 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 1922 // that we are past EOF, not that we are in CachingLex mode. 1923 return !CurPPLexer && !CurTokenLexer && !CurPTHLexer && 1924 !IncludeMacroStack.empty(); 1925 } 1926 void EnterCachingLexMode(); 1927 void ExitCachingLexMode() { 1928 if (InCachingLexMode()) 1929 RemoveTopOfLexerStack(); 1930 } 1931 const Token &PeekAhead(unsigned N); 1932 void AnnotatePreviousCachedTokens(const Token &Tok); 1933 1934 //===--------------------------------------------------------------------===// 1935 /// Handle*Directive - implement the various preprocessor directives. These 1936 /// should side-effect the current preprocessor object so that the next call 1937 /// to Lex() will return the appropriate token next. 1938 void HandleLineDirective(); 1939 void HandleDigitDirective(Token &Tok); 1940 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 1941 void HandleIdentSCCSDirective(Token &Tok); 1942 void HandleMacroPublicDirective(Token &Tok); 1943 void HandleMacroPrivateDirective(); 1944 1945 // File inclusion. 1946 void HandleIncludeDirective(SourceLocation HashLoc, 1947 Token &Tok, 1948 const DirectoryLookup *LookupFrom = nullptr, 1949 const FileEntry *LookupFromFile = nullptr, 1950 bool isImport = false); 1951 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 1952 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 1953 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 1954 void HandleMicrosoftImportDirective(Token &Tok); 1955 1956 public: 1957 /// Check that the given module is available, producing a diagnostic if not. 1958 /// \return \c true if the check failed (because the module is not available). 1959 /// \c false if the module appears to be usable. 1960 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 1961 const TargetInfo &TargetInfo, 1962 DiagnosticsEngine &Diags, Module *M); 1963 1964 // Module inclusion testing. 1965 /// \brief Find the module that owns the source or header file that 1966 /// \p Loc points to. If the location is in a file that was included 1967 /// into a module, or is outside any module, returns nullptr. 1968 Module *getModuleForLocation(SourceLocation Loc); 1969 1970 /// \brief We want to produce a diagnostic at location IncLoc concerning a 1971 /// missing module import. 1972 /// 1973 /// \param IncLoc The location at which the missing import was detected. 1974 /// \param M The desired module. 1975 /// \param MLoc A location within the desired module at which some desired 1976 /// effect occurred (eg, where a desired entity was declared). 1977 /// 1978 /// \return A file that can be #included to import a module containing MLoc. 1979 /// Null if no such file could be determined or if a #include is not 1980 /// appropriate. 1981 const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 1982 Module *M, 1983 SourceLocation MLoc); 1984 1985 bool isRecordingPreamble() const { 1986 return PreambleConditionalStack.isRecording(); 1987 } 1988 1989 bool hasRecordedPreamble() const { 1990 return PreambleConditionalStack.hasRecordedPreamble(); 1991 } 1992 1993 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 1994 return PreambleConditionalStack.getStack(); 1995 } 1996 1997 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 1998 PreambleConditionalStack.setStack(s); 1999 } 2000 2001 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2002 PreambleConditionalStack.startReplaying(); 2003 PreambleConditionalStack.setStack(s); 2004 } 2005 2006 private: 2007 // Macro handling. 2008 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef); 2009 void HandleUndefDirective(); 2010 2011 // Conditional Inclusion. 2012 void HandleIfdefDirective(Token &Tok, bool isIfndef, 2013 bool ReadAnyTokensBeforeDirective); 2014 void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective); 2015 void HandleEndifDirective(Token &Tok); 2016 void HandleElseDirective(Token &Tok); 2017 void HandleElifDirective(Token &Tok); 2018 2019 // Pragmas. 2020 void HandlePragmaDirective(SourceLocation IntroducerLoc, 2021 PragmaIntroducerKind Introducer); 2022 public: 2023 void HandlePragmaOnce(Token &OnceTok); 2024 void HandlePragmaMark(); 2025 void HandlePragmaPoison(); 2026 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2027 void HandlePragmaDependency(Token &DependencyTok); 2028 void HandlePragmaPushMacro(Token &Tok); 2029 void HandlePragmaPopMacro(Token &Tok); 2030 void HandlePragmaIncludeAlias(Token &Tok); 2031 void HandlePragmaModuleBuild(Token &Tok); 2032 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2033 2034 // Return true and store the first token only if any CommentHandler 2035 // has inserted some tokens and getCommentRetentionState() is false. 2036 bool HandleComment(Token &Token, SourceRange Comment); 2037 2038 /// \brief A macro is used, update information about macros that need unused 2039 /// warnings. 2040 void markMacroAsUsed(MacroInfo *MI); 2041 }; 2042 2043 /// \brief Abstract base class that describes a handler that will receive 2044 /// source ranges for each of the comments encountered in the source file. 2045 class CommentHandler { 2046 public: 2047 virtual ~CommentHandler(); 2048 2049 // The handler shall return true if it has pushed any tokens 2050 // to be read using e.g. EnterToken or EnterTokenStream. 2051 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 2052 }; 2053 2054 /// \brief Registry of pragma handlers added by plugins 2055 typedef llvm::Registry<PragmaHandler> PragmaHandlerRegistry; 2056 2057 } // end namespace clang 2058 2059 #endif 2060