Home | History | Annotate | Download | only in Lex
      1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This file defines the Preprocessor interface.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
     15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
     16 
     17 #include "clang/Basic/Builtins.h"
     18 #include "clang/Basic/Diagnostic.h"
     19 #include "clang/Basic/IdentifierTable.h"
     20 #include "clang/Basic/SourceLocation.h"
     21 #include "clang/Lex/Lexer.h"
     22 #include "clang/Lex/MacroInfo.h"
     23 #include "clang/Lex/PPCallbacks.h"
     24 #include "clang/Lex/PPMutationListener.h"
     25 #include "clang/Lex/PTHLexer.h"
     26 #include "clang/Lex/PTHManager.h"
     27 #include "clang/Lex/TokenLexer.h"
     28 #include "llvm/ADT/ArrayRef.h"
     29 #include "llvm/ADT/DenseMap.h"
     30 #include "llvm/ADT/IntrusiveRefCntPtr.h"
     31 #include "llvm/ADT/OwningPtr.h"
     32 #include "llvm/ADT/SmallPtrSet.h"
     33 #include "llvm/ADT/SmallVector.h"
     34 #include "llvm/Support/Allocator.h"
     35 #include <vector>
     36 
     37 namespace llvm {
     38   template<unsigned InternalLen> class SmallString;
     39 }
     40 
     41 namespace clang {
     42 
     43 class SourceManager;
     44 class ExternalPreprocessorSource;
     45 class FileManager;
     46 class FileEntry;
     47 class HeaderSearch;
     48 class PragmaNamespace;
     49 class PragmaHandler;
     50 class CommentHandler;
     51 class ScratchBuffer;
     52 class TargetInfo;
     53 class PPCallbacks;
     54 class CodeCompletionHandler;
     55 class DirectoryLookup;
     56 class PreprocessingRecord;
     57 class ModuleLoader;
     58 class PreprocessorOptions;
     59 
     60 /// \brief Stores token information for comparing actual tokens with
     61 /// predefined values.  Only handles simple tokens and identifiers.
     62 class TokenValue {
     63   tok::TokenKind Kind;
     64   IdentifierInfo *II;
     65 
     66 public:
     67   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(0) {
     68     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
     69     assert(Kind != tok::identifier &&
     70            "Identifiers should be created by TokenValue(IdentifierInfo *)");
     71     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
     72     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
     73   }
     74   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
     75   bool operator==(const Token &Tok) const {
     76     return Tok.getKind() == Kind &&
     77         (!II || II == Tok.getIdentifierInfo());
     78   }
     79 };
     80 
     81 /// Preprocessor - This object engages in a tight little dance with the lexer to
     82 /// efficiently preprocess tokens.  Lexers know only about tokens within a
     83 /// single source file, and don't know anything about preprocessor-level issues
     84 /// like the \#include stack, token expansion, etc.
     85 ///
     86 class Preprocessor : public RefCountedBase<Preprocessor> {
     87   IntrusiveRefCntPtr<PreprocessorOptions> PPOpts;
     88   DiagnosticsEngine        *Diags;
     89   LangOptions       &LangOpts;
     90   const TargetInfo  *Target;
     91   FileManager       &FileMgr;
     92   SourceManager     &SourceMgr;
     93   ScratchBuffer     *ScratchBuf;
     94   HeaderSearch      &HeaderInfo;
     95   ModuleLoader      &TheModuleLoader;
     96 
     97   /// \brief External source of macros.
     98   ExternalPreprocessorSource *ExternalSource;
     99 
    100 
    101   /// PTH - An optional PTHManager object used for getting tokens from
    102   ///  a token cache rather than lexing the original source file.
    103   OwningPtr<PTHManager> PTH;
    104 
    105   /// BP - A BumpPtrAllocator object used to quickly allocate and release
    106   ///  objects internal to the Preprocessor.
    107   llvm::BumpPtrAllocator BP;
    108 
    109   /// Identifiers for builtin macros and other builtins.
    110   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
    111   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
    112   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
    113   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
    114   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
    115   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
    116   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
    117   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
    118   IdentifierInfo *Ident__has_feature;              // __has_feature
    119   IdentifierInfo *Ident__has_extension;            // __has_extension
    120   IdentifierInfo *Ident__has_builtin;              // __has_builtin
    121   IdentifierInfo *Ident__has_attribute;            // __has_attribute
    122   IdentifierInfo *Ident__has_include;              // __has_include
    123   IdentifierInfo *Ident__has_include_next;         // __has_include_next
    124   IdentifierInfo *Ident__has_warning;              // __has_warning
    125   IdentifierInfo *Ident__building_module;          // __building_module
    126   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
    127 
    128   SourceLocation DATELoc, TIMELoc;
    129   unsigned CounterValue;  // Next __COUNTER__ value.
    130 
    131   enum {
    132     /// MaxIncludeStackDepth - Maximum depth of \#includes.
    133     MaxAllowedIncludeStackDepth = 200
    134   };
    135 
    136   // State that is set before the preprocessor begins.
    137   bool KeepComments : 1;
    138   bool KeepMacroComments : 1;
    139   bool SuppressIncludeNotFoundError : 1;
    140 
    141   // State that changes while the preprocessor runs:
    142   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
    143 
    144   /// Whether the preprocessor owns the header search object.
    145   bool OwnsHeaderSearch : 1;
    146 
    147   /// DisableMacroExpansion - True if macro expansion is disabled.
    148   bool DisableMacroExpansion : 1;
    149 
    150   /// MacroExpansionInDirectivesOverride - Temporarily disables
    151   /// DisableMacroExpansion (i.e. enables expansion) when parsing preprocessor
    152   /// directives.
    153   bool MacroExpansionInDirectivesOverride : 1;
    154 
    155   class ResetMacroExpansionHelper;
    156 
    157   /// \brief Whether we have already loaded macros from the external source.
    158   mutable bool ReadMacrosFromExternalSource : 1;
    159 
    160   /// \brief True if pragmas are enabled.
    161   bool PragmasEnabled : 1;
    162 
    163   /// \brief True if the current build action is a preprocessing action.
    164   bool PreprocessedOutput : 1;
    165 
    166   /// \brief True if we are currently preprocessing a #if or #elif directive
    167   bool ParsingIfOrElifDirective;
    168 
    169   /// \brief True if we are pre-expanding macro arguments.
    170   bool InMacroArgPreExpansion;
    171 
    172   /// Identifiers - This is mapping/lookup information for all identifiers in
    173   /// the program, including program keywords.
    174   mutable IdentifierTable Identifiers;
    175 
    176   /// Selectors - This table contains all the selectors in the program. Unlike
    177   /// IdentifierTable above, this table *isn't* populated by the preprocessor.
    178   /// It is declared/expanded here because it's role/lifetime is
    179   /// conceptually similar the IdentifierTable. In addition, the current control
    180   /// flow (in clang::ParseAST()), make it convenient to put here.
    181   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
    182   /// the lifetime of the preprocessor.
    183   SelectorTable Selectors;
    184 
    185   /// BuiltinInfo - Information about builtins.
    186   Builtin::Context BuiltinInfo;
    187 
    188   /// PragmaHandlers - This tracks all of the pragmas that the client registered
    189   /// with this preprocessor.
    190   PragmaNamespace *PragmaHandlers;
    191 
    192   /// \brief Tracks all of the comment handlers that the client registered
    193   /// with this preprocessor.
    194   std::vector<CommentHandler *> CommentHandlers;
    195 
    196   /// \brief True if we want to ignore EOF token and continue later on (thus
    197   /// avoid tearing the Lexer and etc. down).
    198   bool IncrementalProcessing;
    199 
    200   /// \brief The code-completion handler.
    201   CodeCompletionHandler *CodeComplete;
    202 
    203   /// \brief The file that we're performing code-completion for, if any.
    204   const FileEntry *CodeCompletionFile;
    205 
    206   /// \brief The offset in file for the code-completion point.
    207   unsigned CodeCompletionOffset;
    208 
    209   /// \brief The location for the code-completion point. This gets instantiated
    210   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
    211   SourceLocation CodeCompletionLoc;
    212 
    213   /// \brief The start location for the file of the code-completion point.
    214   ///
    215   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
    216   /// for preprocessing.
    217   SourceLocation CodeCompletionFileLoc;
    218 
    219   /// \brief The source location of the 'import' contextual keyword we just
    220   /// lexed, if any.
    221   SourceLocation ModuleImportLoc;
    222 
    223   /// \brief The module import path that we're currently processing.
    224   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
    225 
    226   /// \brief Whether the module import expectes an identifier next. Otherwise,
    227   /// it expects a '.' or ';'.
    228   bool ModuleImportExpectsIdentifier;
    229 
    230   /// \brief The source location of the currently-active
    231   /// #pragma clang arc_cf_code_audited begin.
    232   SourceLocation PragmaARCCFCodeAuditedLoc;
    233 
    234   /// \brief True if we hit the code-completion point.
    235   bool CodeCompletionReached;
    236 
    237   /// \brief The number of bytes that we will initially skip when entering the
    238   /// main file, which is used when loading a precompiled preamble, along
    239   /// with a flag that indicates whether skipping this number of bytes will
    240   /// place the lexer at the start of a line.
    241   std::pair<unsigned, bool> SkipMainFilePreamble;
    242 
    243   /// CurLexer - This is the current top of the stack that we're lexing from if
    244   /// not expanding a macro and we are lexing directly from source code.
    245   ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
    246   OwningPtr<Lexer> CurLexer;
    247 
    248   /// CurPTHLexer - This is the current top of stack that we're lexing from if
    249   ///  not expanding from a macro and we are lexing from a PTH cache.
    250   ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
    251   OwningPtr<PTHLexer> CurPTHLexer;
    252 
    253   /// CurPPLexer - This is the current top of the stack what we're lexing from
    254   ///  if not expanding a macro.  This is an alias for either CurLexer or
    255   ///  CurPTHLexer.
    256   PreprocessorLexer *CurPPLexer;
    257 
    258   /// CurLookup - The DirectoryLookup structure used to find the current
    259   /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
    260   /// implement \#include_next and find directory-specific properties.
    261   const DirectoryLookup *CurDirLookup;
    262 
    263   /// CurTokenLexer - This is the current macro we are expanding, if we are
    264   /// expanding a macro.  One of CurLexer and CurTokenLexer must be null.
    265   OwningPtr<TokenLexer> CurTokenLexer;
    266 
    267   /// \brief The kind of lexer we're currently working with.
    268   enum CurLexerKind {
    269     CLK_Lexer,
    270     CLK_PTHLexer,
    271     CLK_TokenLexer,
    272     CLK_CachingLexer,
    273     CLK_LexAfterModuleImport
    274   } CurLexerKind;
    275 
    276   /// IncludeMacroStack - This keeps track of the stack of files currently
    277   /// \#included, and macros currently being expanded from, not counting
    278   /// CurLexer/CurTokenLexer.
    279   struct IncludeStackInfo {
    280     enum CurLexerKind     CurLexerKind;
    281     Lexer                 *TheLexer;
    282     PTHLexer              *ThePTHLexer;
    283     PreprocessorLexer     *ThePPLexer;
    284     TokenLexer            *TheTokenLexer;
    285     const DirectoryLookup *TheDirLookup;
    286 
    287     IncludeStackInfo(enum CurLexerKind K, Lexer *L, PTHLexer* P,
    288                      PreprocessorLexer* PPL,
    289                      TokenLexer* TL, const DirectoryLookup *D)
    290       : CurLexerKind(K), TheLexer(L), ThePTHLexer(P), ThePPLexer(PPL),
    291         TheTokenLexer(TL), TheDirLookup(D) {}
    292   };
    293   std::vector<IncludeStackInfo> IncludeMacroStack;
    294 
    295   /// Callbacks - These are actions invoked when some preprocessor activity is
    296   /// encountered (e.g. a file is \#included, etc).
    297   PPCallbacks *Callbacks;
    298 
    299   /// \brief Listener whose actions are invoked when an entity in the
    300   /// preprocessor (e.g., a macro) that was loaded from an AST file is
    301   /// later mutated.
    302   PPMutationListener *Listener;
    303 
    304   struct MacroExpandsInfo {
    305     Token Tok;
    306     MacroDirective *MD;
    307     SourceRange Range;
    308     MacroExpandsInfo(Token Tok, MacroDirective *MD, SourceRange Range)
    309       : Tok(Tok), MD(MD), Range(Range) { }
    310   };
    311   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
    312 
    313   /// Macros - For each IdentifierInfo that was associated with a macro, we
    314   /// keep a mapping to the history of all macro definitions and #undefs in
    315   /// the reverse order (the latest one is in the head of the list).
    316   llvm::DenseMap<const IdentifierInfo*, MacroDirective*> Macros;
    317   friend class ASTReader;
    318 
    319   /// \brief Macros that we want to warn because they are not used at the end
    320   /// of the translation unit; we store just their SourceLocations instead
    321   /// something like MacroInfo*. The benefit of this is that when we are
    322   /// deserializing from PCH, we don't need to deserialize identifier & macros
    323   /// just so that we can report that they are unused, we just warn using
    324   /// the SourceLocations of this set (that will be filled by the ASTReader).
    325   /// We are using SmallPtrSet instead of a vector for faster removal.
    326   typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
    327   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
    328 
    329   /// MacroArgCache - This is a "freelist" of MacroArg objects that can be
    330   /// reused for quick allocation.
    331   MacroArgs *MacroArgCache;
    332   friend class MacroArgs;
    333 
    334   /// PragmaPushMacroInfo - For each IdentifierInfo used in a #pragma
    335   /// push_macro directive, we keep a MacroInfo stack used to restore
    336   /// previous macro value.
    337   llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
    338 
    339   // Various statistics we track for performance analysis.
    340   unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
    341   unsigned NumIf, NumElse, NumEndif;
    342   unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
    343   unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
    344   unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
    345   unsigned NumSkipped;
    346 
    347   /// Predefines - This string is the predefined macros that preprocessor
    348   /// should use from the command line etc.
    349   std::string Predefines;
    350 
    351   /// \brief The file ID for the preprocessor predefines.
    352   FileID PredefinesFileID;
    353 
    354   /// TokenLexerCache - Cache macro expanders to reduce malloc traffic.
    355   enum { TokenLexerCacheSize = 8 };
    356   unsigned NumCachedTokenLexers;
    357   TokenLexer *TokenLexerCache[TokenLexerCacheSize];
    358 
    359   /// \brief Keeps macro expanded tokens for TokenLexers.
    360   //
    361   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
    362   /// going to lex in the cache and when it finishes the tokens are removed
    363   /// from the end of the cache.
    364   SmallVector<Token, 16> MacroExpandedTokens;
    365   std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
    366 
    367   /// \brief A record of the macro definitions and expansions that
    368   /// occurred during preprocessing.
    369   ///
    370   /// This is an optional side structure that can be enabled with
    371   /// \c createPreprocessingRecord() prior to preprocessing.
    372   PreprocessingRecord *Record;
    373 
    374 private:  // Cached tokens state.
    375   typedef SmallVector<Token, 1> CachedTokensTy;
    376 
    377   /// CachedTokens - Cached tokens are stored here when we do backtracking or
    378   /// lookahead. They are "lexed" by the CachingLex() method.
    379   CachedTokensTy CachedTokens;
    380 
    381   /// CachedLexPos - The position of the cached token that CachingLex() should
    382   /// "lex" next. If it points beyond the CachedTokens vector, it means that
    383   /// a normal Lex() should be invoked.
    384   CachedTokensTy::size_type CachedLexPos;
    385 
    386   /// BacktrackPositions - Stack of backtrack positions, allowing nested
    387   /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to
    388   /// indicate where CachedLexPos should be set when the BackTrack() method is
    389   /// invoked (at which point the last position is popped).
    390   std::vector<CachedTokensTy::size_type> BacktrackPositions;
    391 
    392   struct MacroInfoChain {
    393     MacroInfo MI;
    394     MacroInfoChain *Next;
    395     MacroInfoChain *Prev;
    396   };
    397 
    398   /// MacroInfos are managed as a chain for easy disposal.  This is the head
    399   /// of that list.
    400   MacroInfoChain *MIChainHead;
    401 
    402   /// MICache - A "freelist" of MacroInfo objects that can be reused for quick
    403   /// allocation.
    404   MacroInfoChain *MICache;
    405 
    406 public:
    407   Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
    408                DiagnosticsEngine &diags, LangOptions &opts,
    409                const TargetInfo *target,
    410                SourceManager &SM, HeaderSearch &Headers,
    411                ModuleLoader &TheModuleLoader,
    412                IdentifierInfoLookup *IILookup = 0,
    413                bool OwnsHeaderSearch = false,
    414                bool DelayInitialization = false,
    415                bool IncrProcessing = false);
    416 
    417   ~Preprocessor();
    418 
    419   /// \brief Initialize the preprocessor, if the constructor did not already
    420   /// perform the initialization.
    421   ///
    422   /// \param Target Information about the target.
    423   void Initialize(const TargetInfo &Target);
    424 
    425   /// \brief Retrieve the preprocessor options used to initialize this
    426   /// preprocessor.
    427   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
    428 
    429   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
    430   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
    431 
    432   const LangOptions &getLangOpts() const { return LangOpts; }
    433   const TargetInfo &getTargetInfo() const { return *Target; }
    434   FileManager &getFileManager() const { return FileMgr; }
    435   SourceManager &getSourceManager() const { return SourceMgr; }
    436   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
    437 
    438   IdentifierTable &getIdentifierTable() { return Identifiers; }
    439   SelectorTable &getSelectorTable() { return Selectors; }
    440   Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
    441   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
    442 
    443   void setPTHManager(PTHManager* pm);
    444 
    445   PTHManager *getPTHManager() { return PTH.get(); }
    446 
    447   void setExternalSource(ExternalPreprocessorSource *Source) {
    448     ExternalSource = Source;
    449   }
    450 
    451   ExternalPreprocessorSource *getExternalSource() const {
    452     return ExternalSource;
    453   }
    454 
    455   /// \brief Retrieve the module loader associated with this preprocessor.
    456   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
    457 
    458   /// \brief True if we are currently preprocessing a #if or #elif directive
    459   bool isParsingIfOrElifDirective() const {
    460     return ParsingIfOrElifDirective;
    461   }
    462 
    463   /// SetCommentRetentionState - Control whether or not the preprocessor retains
    464   /// comments in output.
    465   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
    466     this->KeepComments = KeepComments | KeepMacroComments;
    467     this->KeepMacroComments = KeepMacroComments;
    468   }
    469 
    470   bool getCommentRetentionState() const { return KeepComments; }
    471 
    472   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
    473   bool getPragmasEnabled() const { return PragmasEnabled; }
    474 
    475   void SetSuppressIncludeNotFoundError(bool Suppress) {
    476     SuppressIncludeNotFoundError = Suppress;
    477   }
    478 
    479   bool GetSuppressIncludeNotFoundError() {
    480     return SuppressIncludeNotFoundError;
    481   }
    482 
    483   /// Sets whether the preprocessor is responsible for producing output or if
    484   /// it is producing tokens to be consumed by Parse and Sema.
    485   void setPreprocessedOutput(bool IsPreprocessedOutput) {
    486     PreprocessedOutput = IsPreprocessedOutput;
    487   }
    488 
    489   /// Returns true if the preprocessor is responsible for generating output,
    490   /// false if it is producing tokens to be consumed by Parse and Sema.
    491   bool isPreprocessedOutput() const { return PreprocessedOutput; }
    492 
    493   /// isCurrentLexer - Return true if we are lexing directly from the specified
    494   /// lexer.
    495   bool isCurrentLexer(const PreprocessorLexer *L) const {
    496     return CurPPLexer == L;
    497   }
    498 
    499   /// getCurrentLexer - Return the current lexer being lexed from.  Note
    500   /// that this ignores any potentially active macro expansions and _Pragma
    501   /// expansions going on at the time.
    502   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
    503 
    504   /// getCurrentFileLexer - Return the current file lexer being lexed from.
    505   /// Note that this ignores any potentially active macro expansions and _Pragma
    506   /// expansions going on at the time.
    507   PreprocessorLexer *getCurrentFileLexer() const;
    508 
    509   /// \brief Returns the file ID for the preprocessor predefines.
    510   FileID getPredefinesFileID() const { return PredefinesFileID; }
    511 
    512   /// getPPCallbacks/addPPCallbacks - Accessors for preprocessor callbacks.
    513   /// Note that this class takes ownership of any PPCallbacks object given to
    514   /// it.
    515   PPCallbacks *getPPCallbacks() const { return Callbacks; }
    516   void addPPCallbacks(PPCallbacks *C) {
    517     if (Callbacks)
    518       C = new PPChainedCallbacks(C, Callbacks);
    519     Callbacks = C;
    520   }
    521 
    522   /// \brief Attach an preprocessor mutation listener to the preprocessor.
    523   ///
    524   /// The preprocessor mutation listener provides the ability to track
    525   /// modifications to the preprocessor entities committed after they were
    526   /// initially created.
    527   void setPPMutationListener(PPMutationListener *Listener) {
    528     this->Listener = Listener;
    529   }
    530 
    531   /// \brief Retrieve a pointer to the preprocessor mutation listener
    532   /// associated with this preprocessor, if any.
    533   PPMutationListener *getPPMutationListener() const { return Listener; }
    534 
    535   /// \brief Given an identifier, return the MacroInfo it is \#defined to
    536   /// or null if it isn't \#define'd.
    537   MacroDirective *getMacroDirective(IdentifierInfo *II) const {
    538     if (!II->hasMacroDefinition())
    539       return 0;
    540 
    541     MacroDirective *MD = getMacroDirectiveHistory(II);
    542     assert(MD->getUndefLoc().isInvalid() && "Macro is undefined!");
    543     return MD;
    544   }
    545 
    546   const MacroInfo *getMacroInfo(IdentifierInfo *II) const {
    547     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
    548   }
    549 
    550   MacroInfo *getMacroInfo(IdentifierInfo *II) {
    551     if (MacroDirective *MD = getMacroDirective(II))
    552       return MD->getInfo();
    553     return 0;
    554   }
    555 
    556   /// \brief Given an identifier, return the (probably #undef'd) MacroInfo
    557   /// representing the most recent macro definition. One can iterate over all
    558   /// previous macro definitions from it. This method should only be called for
    559   /// identifiers that hadMacroDefinition().
    560   MacroDirective *getMacroDirectiveHistory(const IdentifierInfo *II) const;
    561 
    562   /// \brief Specify a macro for this identifier.
    563   MacroDirective *setMacroDirective(IdentifierInfo *II, MacroInfo *MI,
    564                                     SourceLocation Loc, bool isImported);
    565   MacroDirective *setMacroDirective(IdentifierInfo *II, MacroInfo *MI) {
    566     return setMacroDirective(II, MI, MI->getDefinitionLoc(), false);
    567   }
    568   /// \brief Add a MacroInfo that was loaded from an AST file.
    569   void addLoadedMacroInfo(IdentifierInfo *II, MacroDirective *MD,
    570                           MacroDirective *Hint = 0);
    571   /// \brief Make the given MacroInfo, that was loaded from an AST file and
    572   /// previously hidden, visible.
    573   void makeLoadedMacroInfoVisible(IdentifierInfo *II, MacroDirective *MD);
    574   /// \brief Undefine a macro for this identifier.
    575   void clearMacroInfo(IdentifierInfo *II);
    576 
    577   /// macro_iterator/macro_begin/macro_end - This allows you to walk the macro
    578   /// history table. Currently defined macros have
    579   /// IdentifierInfo::hasMacroDefinition() set and an empty
    580   /// MacroInfo::getUndefLoc() at the head of the list.
    581   typedef llvm::DenseMap<const IdentifierInfo *,
    582                          MacroDirective*>::const_iterator macro_iterator;
    583   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
    584   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
    585 
    586   /// \brief Return the name of the macro defined before \p Loc that has
    587   /// spelling \p Tokens.  If there are multiple macros with same spelling,
    588   /// return the last one defined.
    589   StringRef getLastMacroWithSpelling(SourceLocation Loc,
    590                                      ArrayRef<TokenValue> Tokens) const;
    591 
    592   const std::string &getPredefines() const { return Predefines; }
    593   /// setPredefines - Set the predefines for this Preprocessor.  These
    594   /// predefines are automatically injected when parsing the main file.
    595   void setPredefines(const char *P) { Predefines = P; }
    596   void setPredefines(const std::string &P) { Predefines = P; }
    597 
    598   /// Return information about the specified preprocessor
    599   /// identifier token.
    600   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
    601     return &Identifiers.get(Name);
    602   }
    603 
    604   /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
    605   /// If 'Namespace' is non-null, then it is a token required to exist on the
    606   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
    607   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
    608   void AddPragmaHandler(PragmaHandler *Handler) {
    609     AddPragmaHandler(StringRef(), Handler);
    610   }
    611 
    612   /// RemovePragmaHandler - Remove the specific pragma handler from
    613   /// the preprocessor. If \p Namespace is non-null, then it should
    614   /// be the namespace that \p Handler was added to. It is an error
    615   /// to remove a handler that has not been registered.
    616   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
    617   void RemovePragmaHandler(PragmaHandler *Handler) {
    618     RemovePragmaHandler(StringRef(), Handler);
    619   }
    620 
    621   /// \brief Add the specified comment handler to the preprocessor.
    622   void addCommentHandler(CommentHandler *Handler);
    623 
    624   /// \brief Remove the specified comment handler.
    625   ///
    626   /// It is an error to remove a handler that has not been registered.
    627   void removeCommentHandler(CommentHandler *Handler);
    628 
    629   /// \brief Set the code completion handler to the given object.
    630   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
    631     CodeComplete = &Handler;
    632   }
    633 
    634   /// \brief Retrieve the current code-completion handler.
    635   CodeCompletionHandler *getCodeCompletionHandler() const {
    636     return CodeComplete;
    637   }
    638 
    639   /// \brief Clear out the code completion handler.
    640   void clearCodeCompletionHandler() {
    641     CodeComplete = 0;
    642   }
    643 
    644   /// \brief Hook used by the lexer to invoke the "natural language" code
    645   /// completion point.
    646   void CodeCompleteNaturalLanguage();
    647 
    648   /// \brief Retrieve the preprocessing record, or NULL if there is no
    649   /// preprocessing record.
    650   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
    651 
    652   /// \brief Create a new preprocessing record, which will keep track of
    653   /// all macro expansions, macro definitions, etc.
    654   void createPreprocessingRecord();
    655 
    656   /// EnterMainSourceFile - Enter the specified FileID as the main source file,
    657   /// which implicitly adds the builtin defines etc.
    658   void EnterMainSourceFile();
    659 
    660   /// EndSourceFile - Inform the preprocessor callbacks that processing is
    661   /// complete.
    662   void EndSourceFile();
    663 
    664   /// EnterSourceFile - Add a source file to the top of the include stack and
    665   /// start lexing tokens from it instead of the current buffer.  Emit an error
    666   /// and don't enter the file on error.
    667   void EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
    668                        SourceLocation Loc);
    669 
    670   /// EnterMacro - Add a Macro to the top of the include stack and start lexing
    671   /// tokens from it instead of the current buffer.  Args specifies the
    672   /// tokens input to a function-like macro.
    673   ///
    674   /// ILEnd specifies the location of the ')' for a function-like macro or the
    675   /// identifier for an object-like macro.
    676   void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
    677                   MacroArgs *Args);
    678 
    679   /// EnterTokenStream - Add a "macro" context to the top of the include stack,
    680   /// which will cause the lexer to start returning the specified tokens.
    681   ///
    682   /// If DisableMacroExpansion is true, tokens lexed from the token stream will
    683   /// not be subject to further macro expansion.  Otherwise, these tokens will
    684   /// be re-macro-expanded when/if expansion is enabled.
    685   ///
    686   /// If OwnsTokens is false, this method assumes that the specified stream of
    687   /// tokens has a permanent owner somewhere, so they do not need to be copied.
    688   /// If it is true, it assumes the array of tokens is allocated with new[] and
    689   /// must be freed.
    690   ///
    691   void EnterTokenStream(const Token *Toks, unsigned NumToks,
    692                         bool DisableMacroExpansion, bool OwnsTokens);
    693 
    694   /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
    695   /// lexer stack.  This should only be used in situations where the current
    696   /// state of the top-of-stack lexer is known.
    697   void RemoveTopOfLexerStack();
    698 
    699   /// EnableBacktrackAtThisPos - From the point that this method is called, and
    700   /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
    701   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
    702   /// make the Preprocessor re-lex the same tokens.
    703   ///
    704   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
    705   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
    706   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
    707   ///
    708   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
    709   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
    710   /// tokens will continue indefinitely.
    711   ///
    712   void EnableBacktrackAtThisPos();
    713 
    714   /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
    715   void CommitBacktrackedTokens();
    716 
    717   /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
    718   /// EnableBacktrackAtThisPos() was previously called.
    719   void Backtrack();
    720 
    721   /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
    722   /// caching of tokens is on.
    723   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
    724 
    725   /// Lex - To lex a token from the preprocessor, just pull a token from the
    726   /// current lexer or macro object.
    727   void Lex(Token &Result) {
    728     switch (CurLexerKind) {
    729     case CLK_Lexer: CurLexer->Lex(Result); break;
    730     case CLK_PTHLexer: CurPTHLexer->Lex(Result); break;
    731     case CLK_TokenLexer: CurTokenLexer->Lex(Result); break;
    732     case CLK_CachingLexer: CachingLex(Result); break;
    733     case CLK_LexAfterModuleImport: LexAfterModuleImport(Result); break;
    734     }
    735   }
    736 
    737   void LexAfterModuleImport(Token &Result);
    738 
    739   /// \brief Lex a string literal, which may be the concatenation of multiple
    740   /// string literals and may even come from macro expansion.
    741   /// \returns true on success, false if a error diagnostic has been generated.
    742   bool LexStringLiteral(Token &Result, std::string &String,
    743                         const char *DiagnosticTag, bool AllowMacroExpansion) {
    744     if (AllowMacroExpansion)
    745       Lex(Result);
    746     else
    747       LexUnexpandedToken(Result);
    748     return FinishLexStringLiteral(Result, String, DiagnosticTag,
    749                                   AllowMacroExpansion);
    750   }
    751 
    752   /// \brief Complete the lexing of a string literal where the first token has
    753   /// already been lexed (see LexStringLiteral).
    754   bool FinishLexStringLiteral(Token &Result, std::string &String,
    755                               const char *DiagnosticTag,
    756                               bool AllowMacroExpansion);
    757 
    758   /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
    759   /// something not a comment.  This is useful in -E -C mode where comments
    760   /// would foul up preprocessor directive handling.
    761   void LexNonComment(Token &Result) {
    762     do
    763       Lex(Result);
    764     while (Result.getKind() == tok::comment);
    765   }
    766 
    767   /// LexUnexpandedToken - This is just like Lex, but this disables macro
    768   /// expansion of identifier tokens.
    769   void LexUnexpandedToken(Token &Result) {
    770     // Disable macro expansion.
    771     bool OldVal = DisableMacroExpansion;
    772     DisableMacroExpansion = true;
    773     // Lex the token.
    774     Lex(Result);
    775 
    776     // Reenable it.
    777     DisableMacroExpansion = OldVal;
    778   }
    779 
    780   /// LexUnexpandedNonComment - Like LexNonComment, but this disables macro
    781   /// expansion of identifier tokens.
    782   void LexUnexpandedNonComment(Token &Result) {
    783     do
    784       LexUnexpandedToken(Result);
    785     while (Result.getKind() == tok::comment);
    786   }
    787 
    788   /// Disables macro expansion everywhere except for preprocessor directives.
    789   void SetMacroExpansionOnlyInDirectives() {
    790     DisableMacroExpansion = true;
    791     MacroExpansionInDirectivesOverride = true;
    792   }
    793 
    794   /// LookAhead - This peeks ahead N tokens and returns that token without
    795   /// consuming any tokens.  LookAhead(0) returns the next token that would be
    796   /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
    797   /// returns normal tokens after phase 5.  As such, it is equivalent to using
    798   /// 'Lex', not 'LexUnexpandedToken'.
    799   const Token &LookAhead(unsigned N) {
    800     if (CachedLexPos + N < CachedTokens.size())
    801       return CachedTokens[CachedLexPos+N];
    802     else
    803       return PeekAhead(N+1);
    804   }
    805 
    806   /// RevertCachedTokens - When backtracking is enabled and tokens are cached,
    807   /// this allows to revert a specific number of tokens.
    808   /// Note that the number of tokens being reverted should be up to the last
    809   /// backtrack position, not more.
    810   void RevertCachedTokens(unsigned N) {
    811     assert(isBacktrackEnabled() &&
    812            "Should only be called when tokens are cached for backtracking");
    813     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
    814          && "Should revert tokens up to the last backtrack position, not more");
    815     assert(signed(CachedLexPos) - signed(N) >= 0 &&
    816            "Corrupted backtrack positions ?");
    817     CachedLexPos -= N;
    818   }
    819 
    820   /// EnterToken - Enters a token in the token stream to be lexed next. If
    821   /// BackTrack() is called afterwards, the token will remain at the insertion
    822   /// point.
    823   void EnterToken(const Token &Tok) {
    824     EnterCachingLexMode();
    825     CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
    826   }
    827 
    828   /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching
    829   /// tokens (because backtrack is enabled) it should replace the most recent
    830   /// cached tokens with the given annotation token. This function has no effect
    831   /// if backtracking is not enabled.
    832   ///
    833   /// Note that the use of this function is just for optimization; so that the
    834   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
    835   /// invoked.
    836   void AnnotateCachedTokens(const Token &Tok) {
    837     assert(Tok.isAnnotation() && "Expected annotation token");
    838     if (CachedLexPos != 0 && isBacktrackEnabled())
    839       AnnotatePreviousCachedTokens(Tok);
    840   }
    841 
    842   /// \brief Replace the last token with an annotation token.
    843   ///
    844   /// Like AnnotateCachedTokens(), this routine replaces an
    845   /// already-parsed (and resolved) token with an annotation
    846   /// token. However, this routine only replaces the last token with
    847   /// the annotation token; it does not affect any other cached
    848   /// tokens. This function has no effect if backtracking is not
    849   /// enabled.
    850   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
    851     assert(Tok.isAnnotation() && "Expected annotation token");
    852     if (CachedLexPos != 0 && isBacktrackEnabled())
    853       CachedTokens[CachedLexPos-1] = Tok;
    854   }
    855 
    856   /// TypoCorrectToken - Update the current token to represent the provided
    857   /// identifier, in order to cache an action performed by typo correction.
    858   void TypoCorrectToken(const Token &Tok) {
    859     assert(Tok.getIdentifierInfo() && "Expected identifier token");
    860     if (CachedLexPos != 0 && isBacktrackEnabled())
    861       CachedTokens[CachedLexPos-1] = Tok;
    862   }
    863 
    864   /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
    865   /// CurTokenLexer pointers.
    866   void recomputeCurLexerKind();
    867 
    868   /// \brief Returns true if incremental processing is enabled
    869   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
    870 
    871   /// \brief Enables the incremental processing
    872   void enableIncrementalProcessing(bool value = true) {
    873     IncrementalProcessing = value;
    874   }
    875 
    876   /// \brief Specify the point at which code-completion will be performed.
    877   ///
    878   /// \param File the file in which code completion should occur. If
    879   /// this file is included multiple times, code-completion will
    880   /// perform completion the first time it is included. If NULL, this
    881   /// function clears out the code-completion point.
    882   ///
    883   /// \param Line the line at which code completion should occur
    884   /// (1-based).
    885   ///
    886   /// \param Column the column at which code completion should occur
    887   /// (1-based).
    888   ///
    889   /// \returns true if an error occurred, false otherwise.
    890   bool SetCodeCompletionPoint(const FileEntry *File,
    891                               unsigned Line, unsigned Column);
    892 
    893   /// \brief Determine if we are performing code completion.
    894   bool isCodeCompletionEnabled() const { return CodeCompletionFile != 0; }
    895 
    896   /// \brief Returns the location of the code-completion point.
    897   /// Returns an invalid location if code-completion is not enabled or the file
    898   /// containing the code-completion point has not been lexed yet.
    899   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
    900 
    901   /// \brief Returns the start location of the file of code-completion point.
    902   /// Returns an invalid location if code-completion is not enabled or the file
    903   /// containing the code-completion point has not been lexed yet.
    904   SourceLocation getCodeCompletionFileLoc() const {
    905     return CodeCompletionFileLoc;
    906   }
    907 
    908   /// \brief Returns true if code-completion is enabled and we have hit the
    909   /// code-completion point.
    910   bool isCodeCompletionReached() const { return CodeCompletionReached; }
    911 
    912   /// \brief Note that we hit the code-completion point.
    913   void setCodeCompletionReached() {
    914     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
    915     CodeCompletionReached = true;
    916     // Silence any diagnostics that occur after we hit the code-completion.
    917     getDiagnostics().setSuppressAllDiagnostics(true);
    918   }
    919 
    920   /// \brief The location of the currently-active \#pragma clang
    921   /// arc_cf_code_audited begin.  Returns an invalid location if there
    922   /// is no such pragma active.
    923   SourceLocation getPragmaARCCFCodeAuditedLoc() const {
    924     return PragmaARCCFCodeAuditedLoc;
    925   }
    926 
    927   /// \brief Set the location of the currently-active \#pragma clang
    928   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
    929   void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
    930     PragmaARCCFCodeAuditedLoc = Loc;
    931   }
    932 
    933   /// \brief Instruct the preprocessor to skip part of the main source file.
    934   ///
    935   /// \param Bytes The number of bytes in the preamble to skip.
    936   ///
    937   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
    938   /// start of a line.
    939   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
    940     SkipMainFilePreamble.first = Bytes;
    941     SkipMainFilePreamble.second = StartOfLine;
    942   }
    943 
    944   /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
    945   /// the specified Token's location, translating the token's start
    946   /// position in the current buffer into a SourcePosition object for rendering.
    947   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
    948     return Diags->Report(Loc, DiagID);
    949   }
    950 
    951   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
    952     return Diags->Report(Tok.getLocation(), DiagID);
    953   }
    954 
    955   /// getSpelling() - Return the 'spelling' of the token at the given
    956   /// location; does not go up to the spelling location or down to the
    957   /// expansion location.
    958   ///
    959   /// \param buffer A buffer which will be used only if the token requires
    960   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
    961   /// \param invalid If non-null, will be set \c true if an error occurs.
    962   StringRef getSpelling(SourceLocation loc,
    963                               SmallVectorImpl<char> &buffer,
    964                               bool *invalid = 0) const {
    965     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
    966   }
    967 
    968   /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
    969   /// token is the characters used to represent the token in the source file
    970   /// after trigraph expansion and escaped-newline folding.  In particular, this
    971   /// wants to get the true, uncanonicalized, spelling of things like digraphs
    972   /// UCNs, etc.
    973   ///
    974   /// \param Invalid If non-null, will be set \c true if an error occurs.
    975   std::string getSpelling(const Token &Tok, bool *Invalid = 0) const {
    976     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
    977   }
    978 
    979   /// getSpelling - This method is used to get the spelling of a token into a
    980   /// preallocated buffer, instead of as an std::string.  The caller is required
    981   /// to allocate enough space for the token, which is guaranteed to be at least
    982   /// Tok.getLength() bytes long.  The length of the actual result is returned.
    983   ///
    984   /// Note that this method may do two possible things: it may either fill in
    985   /// the buffer specified with characters, or it may *change the input pointer*
    986   /// to point to a constant buffer with the data already in it (avoiding a
    987   /// copy).  The caller is not allowed to modify the returned buffer pointer
    988   /// if an internal buffer is returned.
    989   unsigned getSpelling(const Token &Tok, const char *&Buffer,
    990                        bool *Invalid = 0) const {
    991     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
    992   }
    993 
    994   /// getSpelling - This method is used to get the spelling of a token into a
    995   /// SmallVector. Note that the returned StringRef may not point to the
    996   /// supplied buffer if a copy can be avoided.
    997   StringRef getSpelling(const Token &Tok,
    998                         SmallVectorImpl<char> &Buffer,
    999                         bool *Invalid = 0) const;
   1000 
   1001   /// \brief Relex the token at the specified location.
   1002   /// \returns true if there was a failure, false on success.
   1003   bool getRawToken(SourceLocation Loc, Token &Result) {
   1004     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts);
   1005   }
   1006 
   1007   /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
   1008   /// with length 1, return the character.
   1009   char getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
   1010                                                    bool *Invalid = 0) const {
   1011     assert(Tok.is(tok::numeric_constant) &&
   1012            Tok.getLength() == 1 && "Called on unsupported token");
   1013     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
   1014 
   1015     // If the token is carrying a literal data pointer, just use it.
   1016     if (const char *D = Tok.getLiteralData())
   1017       return *D;
   1018 
   1019     // Otherwise, fall back on getCharacterData, which is slower, but always
   1020     // works.
   1021     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
   1022   }
   1023 
   1024   /// \brief Retrieve the name of the immediate macro expansion.
   1025   ///
   1026   /// This routine starts from a source location, and finds the name of the macro
   1027   /// responsible for its immediate expansion. It looks through any intervening
   1028   /// macro argument expansions to compute this. It returns a StringRef which
   1029   /// refers to the SourceManager-owned buffer of the source where that macro
   1030   /// name is spelled. Thus, the result shouldn't out-live the SourceManager.
   1031   StringRef getImmediateMacroName(SourceLocation Loc) {
   1032     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
   1033   }
   1034 
   1035   /// CreateString - Plop the specified string into a scratch buffer and set the
   1036   /// specified token's location and length to it.  If specified, the source
   1037   /// location provides a location of the expansion point of the token.
   1038   void CreateString(StringRef Str, Token &Tok,
   1039                     SourceLocation ExpansionLocStart = SourceLocation(),
   1040                     SourceLocation ExpansionLocEnd = SourceLocation());
   1041 
   1042   /// \brief Computes the source location just past the end of the
   1043   /// token at this source location.
   1044   ///
   1045   /// This routine can be used to produce a source location that
   1046   /// points just past the end of the token referenced by \p Loc, and
   1047   /// is generally used when a diagnostic needs to point just after a
   1048   /// token where it expected something different that it received. If
   1049   /// the returned source location would not be meaningful (e.g., if
   1050   /// it points into a macro), this routine returns an invalid
   1051   /// source location.
   1052   ///
   1053   /// \param Offset an offset from the end of the token, where the source
   1054   /// location should refer to. The default offset (0) produces a source
   1055   /// location pointing just past the end of the token; an offset of 1 produces
   1056   /// a source location pointing to the last character in the token, etc.
   1057   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
   1058     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
   1059   }
   1060 
   1061   /// \brief Returns true if the given MacroID location points at the first
   1062   /// token of the macro expansion.
   1063   ///
   1064   /// \param MacroBegin If non-null and function returns true, it is set to
   1065   /// begin location of the macro.
   1066   bool isAtStartOfMacroExpansion(SourceLocation loc,
   1067                                  SourceLocation *MacroBegin = 0) const {
   1068     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
   1069                                             MacroBegin);
   1070   }
   1071 
   1072   /// \brief Returns true if the given MacroID location points at the last
   1073   /// token of the macro expansion.
   1074   ///
   1075   /// \param MacroEnd If non-null and function returns true, it is set to
   1076   /// end location of the macro.
   1077   bool isAtEndOfMacroExpansion(SourceLocation loc,
   1078                                SourceLocation *MacroEnd = 0) const {
   1079     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
   1080   }
   1081 
   1082   /// DumpToken - Print the token to stderr, used for debugging.
   1083   ///
   1084   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
   1085   void DumpLocation(SourceLocation Loc) const;
   1086   void DumpMacro(const MacroInfo &MI) const;
   1087 
   1088   /// AdvanceToTokenCharacter - Given a location that specifies the start of a
   1089   /// token, return a new location that specifies a character within the token.
   1090   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
   1091                                          unsigned Char) const {
   1092     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
   1093   }
   1094 
   1095   /// IncrementPasteCounter - Increment the counters for the number of token
   1096   /// paste operations performed.  If fast was specified, this is a 'fast paste'
   1097   /// case we handled.
   1098   ///
   1099   void IncrementPasteCounter(bool isFast) {
   1100     if (isFast)
   1101       ++NumFastTokenPaste;
   1102     else
   1103       ++NumTokenPaste;
   1104   }
   1105 
   1106   void PrintStats();
   1107 
   1108   size_t getTotalMemory() const;
   1109 
   1110   /// HandleMicrosoftCommentPaste - When the macro expander pastes together a
   1111   /// comment (/##/) in microsoft mode, this method handles updating the current
   1112   /// state, returning the token on the next source line.
   1113   void HandleMicrosoftCommentPaste(Token &Tok);
   1114 
   1115   //===--------------------------------------------------------------------===//
   1116   // Preprocessor callback methods.  These are invoked by a lexer as various
   1117   // directives and events are found.
   1118 
   1119   /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
   1120   /// identifier information for the token and install it into the token,
   1121   /// updating the token kind accordingly.
   1122   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
   1123 
   1124 private:
   1125   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
   1126 
   1127 public:
   1128 
   1129   // SetPoisonReason - Call this function to indicate the reason for
   1130   // poisoning an identifier. If that identifier is accessed while
   1131   // poisoned, then this reason will be used instead of the default
   1132   // "poisoned" diagnostic.
   1133   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
   1134 
   1135   // HandlePoisonedIdentifier - Display reason for poisoned
   1136   // identifier.
   1137   void HandlePoisonedIdentifier(Token & Tok);
   1138 
   1139   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
   1140     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
   1141       if(II->isPoisoned()) {
   1142         HandlePoisonedIdentifier(Identifier);
   1143       }
   1144     }
   1145   }
   1146 
   1147 private:
   1148   /// Identifiers used for SEH handling in Borland. These are only
   1149   /// allowed in particular circumstances
   1150   // __except block
   1151   IdentifierInfo *Ident__exception_code,
   1152                  *Ident___exception_code,
   1153                  *Ident_GetExceptionCode;
   1154   // __except filter expression
   1155   IdentifierInfo *Ident__exception_info,
   1156                  *Ident___exception_info,
   1157                  *Ident_GetExceptionInfo;
   1158   // __finally
   1159   IdentifierInfo *Ident__abnormal_termination,
   1160                  *Ident___abnormal_termination,
   1161                  *Ident_AbnormalTermination;
   1162 public:
   1163   void PoisonSEHIdentifiers(bool Poison = true); // Borland
   1164 
   1165   /// HandleIdentifier - This callback is invoked when the lexer reads an
   1166   /// identifier and has filled in the tokens IdentifierInfo member.  This
   1167   /// callback potentially macro expands it or turns it into a named token (like
   1168   /// 'for').
   1169   void HandleIdentifier(Token &Identifier);
   1170 
   1171 
   1172   /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
   1173   /// the current file.  This either returns the EOF token and returns true, or
   1174   /// pops a level off the include stack and returns false, at which point the
   1175   /// client should call lex again.
   1176   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
   1177 
   1178   /// HandleEndOfTokenLexer - This callback is invoked when the current
   1179   /// TokenLexer hits the end of its token stream.
   1180   bool HandleEndOfTokenLexer(Token &Result);
   1181 
   1182   /// HandleDirective - This callback is invoked when the lexer sees a # token
   1183   /// at the start of a line.  This consumes the directive, modifies the
   1184   /// lexer/preprocessor state, and advances the lexer(s) so that the next token
   1185   /// read is the correct one.
   1186   void HandleDirective(Token &Result);
   1187 
   1188   /// CheckEndOfDirective - Ensure that the next token is a tok::eod token.  If
   1189   /// not, emit a diagnostic and consume up until the eod.  If EnableMacros is
   1190   /// true, then we consider macros that expand to zero tokens as being ok.
   1191   void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
   1192 
   1193   /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
   1194   /// current line until the tok::eod token is found.
   1195   void DiscardUntilEndOfDirective();
   1196 
   1197   /// SawDateOrTime - This returns true if the preprocessor has seen a use of
   1198   /// __DATE__ or __TIME__ in the file so far.
   1199   bool SawDateOrTime() const {
   1200     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
   1201   }
   1202   unsigned getCounterValue() const { return CounterValue; }
   1203   void setCounterValue(unsigned V) { CounterValue = V; }
   1204 
   1205   /// \brief Retrieves the module that we're currently building, if any.
   1206   Module *getCurrentModule();
   1207 
   1208   /// \brief Allocate a new MacroInfo object with the provided SourceLocation.
   1209   MacroInfo *AllocateMacroInfo(SourceLocation L);
   1210 
   1211   /// \brief Turn the specified lexer token into a fully checked and spelled
   1212   /// filename, e.g. as an operand of \#include.
   1213   ///
   1214   /// The caller is expected to provide a buffer that is large enough to hold
   1215   /// the spelling of the filename, but is also expected to handle the case
   1216   /// when this method decides to use a different buffer.
   1217   ///
   1218   /// \returns true if the input filename was in <>'s or false if it was
   1219   /// in ""'s.
   1220   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
   1221 
   1222   /// \brief Given a "foo" or \<foo> reference, look up the indicated file.
   1223   ///
   1224   /// Returns null on failure.  \p isAngled indicates whether the file
   1225   /// reference is for system \#include's or not (i.e. using <> instead of "").
   1226   const FileEntry *LookupFile(StringRef Filename,
   1227                               bool isAngled, const DirectoryLookup *FromDir,
   1228                               const DirectoryLookup *&CurDir,
   1229                               SmallVectorImpl<char> *SearchPath,
   1230                               SmallVectorImpl<char> *RelativePath,
   1231                               Module **SuggestedModule,
   1232                               bool SkipCache = false);
   1233 
   1234   /// GetCurLookup - The DirectoryLookup structure used to find the current
   1235   /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
   1236   /// implement \#include_next and find directory-specific properties.
   1237   const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
   1238 
   1239   /// \brief Return true if we're in the top-level file, not in a \#include.
   1240   bool isInPrimaryFile() const;
   1241 
   1242   /// ConcatenateIncludeName - Handle cases where the \#include name is expanded
   1243   /// from a macro as multiple tokens, which need to be glued together.  This
   1244   /// occurs for code like:
   1245   /// \code
   1246   ///    \#define FOO <x/y.h>
   1247   ///    \#include FOO
   1248   /// \endcode
   1249   /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
   1250   ///
   1251   /// This code concatenates and consumes tokens up to the '>' token.  It
   1252   /// returns false if the > was found, otherwise it returns true if it finds
   1253   /// and consumes the EOD marker.
   1254   bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
   1255                               SourceLocation &End);
   1256 
   1257   /// LexOnOffSwitch - Lex an on-off-switch (C99 6.10.6p2) and verify that it is
   1258   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
   1259   bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
   1260 
   1261 private:
   1262 
   1263   void PushIncludeMacroStack() {
   1264     IncludeMacroStack.push_back(IncludeStackInfo(CurLexerKind,
   1265                                                  CurLexer.take(),
   1266                                                  CurPTHLexer.take(),
   1267                                                  CurPPLexer,
   1268                                                  CurTokenLexer.take(),
   1269                                                  CurDirLookup));
   1270     CurPPLexer = 0;
   1271   }
   1272 
   1273   void PopIncludeMacroStack() {
   1274     CurLexer.reset(IncludeMacroStack.back().TheLexer);
   1275     CurPTHLexer.reset(IncludeMacroStack.back().ThePTHLexer);
   1276     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
   1277     CurTokenLexer.reset(IncludeMacroStack.back().TheTokenLexer);
   1278     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
   1279     CurLexerKind = IncludeMacroStack.back().CurLexerKind;
   1280     IncludeMacroStack.pop_back();
   1281   }
   1282 
   1283   /// \brief Allocate a new MacroInfo object.
   1284   MacroInfo *AllocateMacroInfo();
   1285 
   1286   MacroDirective *AllocateMacroDirective(MacroInfo *MI, SourceLocation Loc,
   1287                                          bool isImported);
   1288 
   1289   /// \brief Release the specified MacroInfo for re-use.
   1290   ///
   1291   /// This memory will  be reused for allocating new MacroInfo objects.
   1292   void ReleaseMacroInfo(MacroInfo* MI);
   1293 
   1294   /// ReadMacroName - Lex and validate a macro name, which occurs after a
   1295   /// \#define or \#undef.  This emits a diagnostic, sets the token kind to eod,
   1296   /// and discards the rest of the macro line if the macro name is invalid.
   1297   void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
   1298 
   1299   /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
   1300   /// definition has just been read.  Lex the rest of the arguments and the
   1301   /// closing ), updating MI with what we learn and saving in LastTok the
   1302   /// last token read.
   1303   /// Return true if an error occurs parsing the arg list.
   1304   bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
   1305 
   1306   /// We just read a \#if or related directive and decided that the
   1307   /// subsequent tokens are in the \#if'd out portion of the
   1308   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
   1309   /// FoundNonSkipPortion is true, then we have already emitted code for part of
   1310   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
   1311   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
   1312   /// already seen one so a \#else directive is a duplicate.  When this returns,
   1313   /// the caller can lex the first valid token.
   1314   void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
   1315                                     bool FoundNonSkipPortion, bool FoundElse,
   1316                                     SourceLocation ElseLoc = SourceLocation());
   1317 
   1318   /// \brief A fast PTH version of SkipExcludedConditionalBlock.
   1319   void PTHSkipExcludedConditionalBlock();
   1320 
   1321   /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
   1322   /// may occur after a #if or #elif directive and return it as a bool.  If the
   1323   /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
   1324   bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
   1325 
   1326   /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
   1327   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
   1328   void RegisterBuiltinPragmas();
   1329 
   1330   /// \brief Register builtin macros such as __LINE__ with the identifier table.
   1331   void RegisterBuiltinMacros();
   1332 
   1333   /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
   1334   /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
   1335   /// the macro should not be expanded return true, otherwise return false.
   1336   bool HandleMacroExpandedIdentifier(Token &Tok, MacroDirective *MD);
   1337 
   1338   /// \brief Cache macro expanded tokens for TokenLexers.
   1339   //
   1340   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
   1341   /// going to lex in the cache and when it finishes the tokens are removed
   1342   /// from the end of the cache.
   1343   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
   1344                                   ArrayRef<Token> tokens);
   1345   void removeCachedMacroExpandedTokensOfLastLexer();
   1346   friend void TokenLexer::ExpandFunctionArguments();
   1347 
   1348   /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
   1349   /// lexed is a '('.  If so, consume the token and return true, if not, this
   1350   /// method should have no observable side-effect on the lexed tokens.
   1351   bool isNextPPTokenLParen();
   1352 
   1353   /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
   1354   /// invoked to read all of the formal arguments specified for the macro
   1355   /// invocation.  This returns null on error.
   1356   MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
   1357                                        SourceLocation &ExpansionEnd);
   1358 
   1359   /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
   1360   /// as a builtin macro, handle it and return the next token as 'Tok'.
   1361   void ExpandBuiltinMacro(Token &Tok);
   1362 
   1363   /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
   1364   /// return the first token after the directive.  The _Pragma token has just
   1365   /// been read into 'Tok'.
   1366   void Handle_Pragma(Token &Tok);
   1367 
   1368   /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
   1369   /// is not enclosed within a string literal.
   1370   void HandleMicrosoft__pragma(Token &Tok);
   1371 
   1372   /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
   1373   /// start lexing tokens from it instead of the current buffer.
   1374   void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
   1375 
   1376   /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and
   1377   /// start getting tokens from it using the PTH cache.
   1378   void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
   1379 
   1380   /// \brief Set the file ID for the preprocessor predefines.
   1381   void setPredefinesFileID(FileID FID) {
   1382     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
   1383     PredefinesFileID = FID;
   1384   }
   1385 
   1386   /// IsFileLexer - Returns true if we are lexing from a file and not a
   1387   ///  pragma or a macro.
   1388   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
   1389     return L ? !L->isPragmaLexer() : P != 0;
   1390   }
   1391 
   1392   static bool IsFileLexer(const IncludeStackInfo& I) {
   1393     return IsFileLexer(I.TheLexer, I.ThePPLexer);
   1394   }
   1395 
   1396   bool IsFileLexer() const {
   1397     return IsFileLexer(CurLexer.get(), CurPPLexer);
   1398   }
   1399 
   1400   //===--------------------------------------------------------------------===//
   1401   // Caching stuff.
   1402   void CachingLex(Token &Result);
   1403   bool InCachingLexMode() const {
   1404     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
   1405     // that we are past EOF, not that we are in CachingLex mode.
   1406     return CurPPLexer == 0 && CurTokenLexer == 0 && CurPTHLexer == 0 &&
   1407            !IncludeMacroStack.empty();
   1408   }
   1409   void EnterCachingLexMode();
   1410   void ExitCachingLexMode() {
   1411     if (InCachingLexMode())
   1412       RemoveTopOfLexerStack();
   1413   }
   1414   const Token &PeekAhead(unsigned N);
   1415   void AnnotatePreviousCachedTokens(const Token &Tok);
   1416 
   1417   //===--------------------------------------------------------------------===//
   1418   /// Handle*Directive - implement the various preprocessor directives.  These
   1419   /// should side-effect the current preprocessor object so that the next call
   1420   /// to Lex() will return the appropriate token next.
   1421   void HandleLineDirective(Token &Tok);
   1422   void HandleDigitDirective(Token &Tok);
   1423   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
   1424   void HandleIdentSCCSDirective(Token &Tok);
   1425   void HandleMacroPublicDirective(Token &Tok);
   1426   void HandleMacroPrivateDirective(Token &Tok);
   1427 
   1428   // File inclusion.
   1429   void HandleIncludeDirective(SourceLocation HashLoc,
   1430                               Token &Tok,
   1431                               const DirectoryLookup *LookupFrom = 0,
   1432                               bool isImport = false);
   1433   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
   1434   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
   1435   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
   1436   void HandleMicrosoftImportDirective(Token &Tok);
   1437 
   1438   // Macro handling.
   1439   void HandleDefineDirective(Token &Tok);
   1440   void HandleUndefDirective(Token &Tok);
   1441   void UndefineMacro(IdentifierInfo *II, MacroDirective *MD,
   1442                      SourceLocation UndefLoc);
   1443 
   1444   // Conditional Inclusion.
   1445   void HandleIfdefDirective(Token &Tok, bool isIfndef,
   1446                             bool ReadAnyTokensBeforeDirective);
   1447   void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
   1448   void HandleEndifDirective(Token &Tok);
   1449   void HandleElseDirective(Token &Tok);
   1450   void HandleElifDirective(Token &Tok);
   1451 
   1452   // Pragmas.
   1453   void HandlePragmaDirective(unsigned Introducer);
   1454 public:
   1455   void HandlePragmaOnce(Token &OnceTok);
   1456   void HandlePragmaMark();
   1457   void HandlePragmaPoison(Token &PoisonTok);
   1458   void HandlePragmaSystemHeader(Token &SysHeaderTok);
   1459   void HandlePragmaDependency(Token &DependencyTok);
   1460   void HandlePragmaComment(Token &CommentTok);
   1461   void HandlePragmaMessage(Token &MessageTok);
   1462   void HandlePragmaPushMacro(Token &Tok);
   1463   void HandlePragmaPopMacro(Token &Tok);
   1464   void HandlePragmaIncludeAlias(Token &Tok);
   1465   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
   1466 
   1467   // Return true and store the first token only if any CommentHandler
   1468   // has inserted some tokens and getCommentRetentionState() is false.
   1469   bool HandleComment(Token &Token, SourceRange Comment);
   1470 
   1471   /// \brief A macro is used, update information about macros that need unused
   1472   /// warnings.
   1473   void markMacroAsUsed(MacroInfo *MI);
   1474 };
   1475 
   1476 /// \brief Abstract base class that describes a handler that will receive
   1477 /// source ranges for each of the comments encountered in the source file.
   1478 class CommentHandler {
   1479 public:
   1480   virtual ~CommentHandler();
   1481 
   1482   // The handler shall return true if it has pushed any tokens
   1483   // to be read using e.g. EnterToken or EnterTokenStream.
   1484   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
   1485 };
   1486 
   1487 }  // end namespace clang
   1488 
   1489 #endif
   1490