Home | History | Annotate | Download | only in Lex
      1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This file defines the Preprocessor interface.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
     15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
     16 
     17 #include "clang/Lex/MacroInfo.h"
     18 #include "clang/Lex/Lexer.h"
     19 #include "clang/Lex/PTHLexer.h"
     20 #include "clang/Lex/PPCallbacks.h"
     21 #include "clang/Lex/TokenLexer.h"
     22 #include "clang/Lex/PTHManager.h"
     23 #include "clang/Basic/Builtins.h"
     24 #include "clang/Basic/Diagnostic.h"
     25 #include "clang/Basic/IdentifierTable.h"
     26 #include "clang/Basic/SourceLocation.h"
     27 #include "llvm/ADT/DenseMap.h"
     28 #include "llvm/ADT/IntrusiveRefCntPtr.h"
     29 #include "llvm/ADT/SmallPtrSet.h"
     30 #include "llvm/ADT/OwningPtr.h"
     31 #include "llvm/ADT/SmallVector.h"
     32 #include "llvm/ADT/ArrayRef.h"
     33 #include "llvm/Support/Allocator.h"
     34 #include <vector>
     35 
     36 namespace llvm {
     37   template<unsigned InternalLen> class SmallString;
     38 }
     39 
     40 namespace clang {
     41 
     42 class SourceManager;
     43 class ExternalPreprocessorSource;
     44 class FileManager;
     45 class FileEntry;
     46 class HeaderSearch;
     47 class PragmaNamespace;
     48 class PragmaHandler;
     49 class CommentHandler;
     50 class ScratchBuffer;
     51 class TargetInfo;
     52 class PPCallbacks;
     53 class CodeCompletionHandler;
     54 class DirectoryLookup;
     55 class PreprocessingRecord;
     56 class ModuleLoader;
     57 
     58 /// Preprocessor - This object engages in a tight little dance with the lexer to
     59 /// efficiently preprocess tokens.  Lexers know only about tokens within a
     60 /// single source file, and don't know anything about preprocessor-level issues
     61 /// like the \#include stack, token expansion, etc.
     62 ///
     63 class Preprocessor : public RefCountedBase<Preprocessor> {
     64   DiagnosticsEngine        *Diags;
     65   LangOptions       &LangOpts;
     66   const TargetInfo  *Target;
     67   FileManager       &FileMgr;
     68   SourceManager     &SourceMgr;
     69   ScratchBuffer     *ScratchBuf;
     70   HeaderSearch      &HeaderInfo;
     71   ModuleLoader      &TheModuleLoader;
     72 
     73   /// \brief External source of macros.
     74   ExternalPreprocessorSource *ExternalSource;
     75 
     76 
     77   /// PTH - An optional PTHManager object used for getting tokens from
     78   ///  a token cache rather than lexing the original source file.
     79   OwningPtr<PTHManager> PTH;
     80 
     81   /// BP - A BumpPtrAllocator object used to quickly allocate and release
     82   ///  objects internal to the Preprocessor.
     83   llvm::BumpPtrAllocator BP;
     84 
     85   /// Identifiers for builtin macros and other builtins.
     86   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
     87   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
     88   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
     89   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
     90   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
     91   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
     92   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
     93   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
     94   IdentifierInfo *Ident__has_feature;              // __has_feature
     95   IdentifierInfo *Ident__has_extension;            // __has_extension
     96   IdentifierInfo *Ident__has_builtin;              // __has_builtin
     97   IdentifierInfo *Ident__has_attribute;            // __has_attribute
     98   IdentifierInfo *Ident__has_include;              // __has_include
     99   IdentifierInfo *Ident__has_include_next;         // __has_include_next
    100   IdentifierInfo *Ident__has_warning;              // __has_warning
    101 
    102   SourceLocation DATELoc, TIMELoc;
    103   unsigned CounterValue;  // Next __COUNTER__ value.
    104 
    105   enum {
    106     /// MaxIncludeStackDepth - Maximum depth of \#includes.
    107     MaxAllowedIncludeStackDepth = 200
    108   };
    109 
    110   // State that is set before the preprocessor begins.
    111   bool KeepComments : 1;
    112   bool KeepMacroComments : 1;
    113   bool SuppressIncludeNotFoundError : 1;
    114 
    115   // State that changes while the preprocessor runs:
    116   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
    117 
    118   /// Whether the preprocessor owns the header search object.
    119   bool OwnsHeaderSearch : 1;
    120 
    121   /// DisableMacroExpansion - True if macro expansion is disabled.
    122   bool DisableMacroExpansion : 1;
    123 
    124   /// MacroExpansionInDirectivesOverride - Temporarily disables
    125   /// DisableMacroExpansion (i.e. enables expansion) when parsing preprocessor
    126   /// directives.
    127   bool MacroExpansionInDirectivesOverride : 1;
    128 
    129   class ResetMacroExpansionHelper;
    130 
    131   /// \brief Whether we have already loaded macros from the external source.
    132   mutable bool ReadMacrosFromExternalSource : 1;
    133 
    134   /// \brief True if pragmas are enabled.
    135   bool PragmasEnabled : 1;
    136 
    137   /// \brief True if we are pre-expanding macro arguments.
    138   bool InMacroArgPreExpansion;
    139 
    140   /// Identifiers - This is mapping/lookup information for all identifiers in
    141   /// the program, including program keywords.
    142   mutable IdentifierTable Identifiers;
    143 
    144   /// Selectors - This table contains all the selectors in the program. Unlike
    145   /// IdentifierTable above, this table *isn't* populated by the preprocessor.
    146   /// It is declared/expanded here because it's role/lifetime is
    147   /// conceptually similar the IdentifierTable. In addition, the current control
    148   /// flow (in clang::ParseAST()), make it convenient to put here.
    149   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
    150   /// the lifetime of the preprocessor.
    151   SelectorTable Selectors;
    152 
    153   /// BuiltinInfo - Information about builtins.
    154   Builtin::Context BuiltinInfo;
    155 
    156   /// PragmaHandlers - This tracks all of the pragmas that the client registered
    157   /// with this preprocessor.
    158   PragmaNamespace *PragmaHandlers;
    159 
    160   /// \brief Tracks all of the comment handlers that the client registered
    161   /// with this preprocessor.
    162   std::vector<CommentHandler *> CommentHandlers;
    163 
    164   /// \brief True if we want to ignore EOF token and continue later on (thus
    165   /// avoid tearing the Lexer and etc. down).
    166   bool IncrementalProcessing;
    167 
    168   /// \brief The code-completion handler.
    169   CodeCompletionHandler *CodeComplete;
    170 
    171   /// \brief The file that we're performing code-completion for, if any.
    172   const FileEntry *CodeCompletionFile;
    173 
    174   /// \brief The offset in file for the code-completion point.
    175   unsigned CodeCompletionOffset;
    176 
    177   /// \brief The location for the code-completion point. This gets instantiated
    178   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
    179   SourceLocation CodeCompletionLoc;
    180 
    181   /// \brief The start location for the file of the code-completion point.
    182   ///
    183   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
    184   /// for preprocessing.
    185   SourceLocation CodeCompletionFileLoc;
    186 
    187   /// \brief The source location of the 'import' contextual keyword we just
    188   /// lexed, if any.
    189   SourceLocation ModuleImportLoc;
    190 
    191   /// \brief The module import path that we're currently processing.
    192   llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2>
    193     ModuleImportPath;
    194 
    195   /// \brief Whether the module import expectes an identifier next. Otherwise,
    196   /// it expects a '.' or ';'.
    197   bool ModuleImportExpectsIdentifier;
    198 
    199   /// \brief The source location of the currently-active
    200   /// #pragma clang arc_cf_code_audited begin.
    201   SourceLocation PragmaARCCFCodeAuditedLoc;
    202 
    203   /// \brief True if we hit the code-completion point.
    204   bool CodeCompletionReached;
    205 
    206   /// \brief The number of bytes that we will initially skip when entering the
    207   /// main file, which is used when loading a precompiled preamble, along
    208   /// with a flag that indicates whether skipping this number of bytes will
    209   /// place the lexer at the start of a line.
    210   std::pair<unsigned, bool> SkipMainFilePreamble;
    211 
    212   /// CurLexer - This is the current top of the stack that we're lexing from if
    213   /// not expanding a macro and we are lexing directly from source code.
    214   ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
    215   OwningPtr<Lexer> CurLexer;
    216 
    217   /// CurPTHLexer - This is the current top of stack that we're lexing from if
    218   ///  not expanding from a macro and we are lexing from a PTH cache.
    219   ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
    220   OwningPtr<PTHLexer> CurPTHLexer;
    221 
    222   /// CurPPLexer - This is the current top of the stack what we're lexing from
    223   ///  if not expanding a macro.  This is an alias for either CurLexer or
    224   ///  CurPTHLexer.
    225   PreprocessorLexer *CurPPLexer;
    226 
    227   /// CurLookup - The DirectoryLookup structure used to find the current
    228   /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
    229   /// implement \#include_next and find directory-specific properties.
    230   const DirectoryLookup *CurDirLookup;
    231 
    232   /// CurTokenLexer - This is the current macro we are expanding, if we are
    233   /// expanding a macro.  One of CurLexer and CurTokenLexer must be null.
    234   OwningPtr<TokenLexer> CurTokenLexer;
    235 
    236   /// \brief The kind of lexer we're currently working with.
    237   enum CurLexerKind {
    238     CLK_Lexer,
    239     CLK_PTHLexer,
    240     CLK_TokenLexer,
    241     CLK_CachingLexer,
    242     CLK_LexAfterModuleImport
    243   } CurLexerKind;
    244 
    245   /// IncludeMacroStack - This keeps track of the stack of files currently
    246   /// \#included, and macros currently being expanded from, not counting
    247   /// CurLexer/CurTokenLexer.
    248   struct IncludeStackInfo {
    249     enum CurLexerKind     CurLexerKind;
    250     Lexer                 *TheLexer;
    251     PTHLexer              *ThePTHLexer;
    252     PreprocessorLexer     *ThePPLexer;
    253     TokenLexer            *TheTokenLexer;
    254     const DirectoryLookup *TheDirLookup;
    255 
    256     IncludeStackInfo(enum CurLexerKind K, Lexer *L, PTHLexer* P,
    257                      PreprocessorLexer* PPL,
    258                      TokenLexer* TL, const DirectoryLookup *D)
    259       : CurLexerKind(K), TheLexer(L), ThePTHLexer(P), ThePPLexer(PPL),
    260         TheTokenLexer(TL), TheDirLookup(D) {}
    261   };
    262   std::vector<IncludeStackInfo> IncludeMacroStack;
    263 
    264   /// Callbacks - These are actions invoked when some preprocessor activity is
    265   /// encountered (e.g. a file is \#included, etc).
    266   PPCallbacks *Callbacks;
    267 
    268   struct MacroExpandsInfo {
    269     Token Tok;
    270     MacroInfo *MI;
    271     SourceRange Range;
    272     MacroExpandsInfo(Token Tok, MacroInfo *MI, SourceRange Range)
    273       : Tok(Tok), MI(MI), Range(Range) { }
    274   };
    275   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
    276 
    277   /// Macros - For each IdentifierInfo that was associated with a macro, we
    278   /// keep a mapping to the history of all macro definitions and #undefs in
    279   /// the reverse order (the latest one is in the head of the list).
    280   llvm::DenseMap<IdentifierInfo*, MacroInfo*> Macros;
    281 
    282   /// \brief Macros that we want to warn because they are not used at the end
    283   /// of the translation unit; we store just their SourceLocations instead
    284   /// something like MacroInfo*. The benefit of this is that when we are
    285   /// deserializing from PCH, we don't need to deserialize identifier & macros
    286   /// just so that we can report that they are unused, we just warn using
    287   /// the SourceLocations of this set (that will be filled by the ASTReader).
    288   /// We are using SmallPtrSet instead of a vector for faster removal.
    289   typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
    290   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
    291 
    292   /// MacroArgCache - This is a "freelist" of MacroArg objects that can be
    293   /// reused for quick allocation.
    294   MacroArgs *MacroArgCache;
    295   friend class MacroArgs;
    296 
    297   /// PragmaPushMacroInfo - For each IdentifierInfo used in a #pragma
    298   /// push_macro directive, we keep a MacroInfo stack used to restore
    299   /// previous macro value.
    300   llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
    301 
    302   // Various statistics we track for performance analysis.
    303   unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
    304   unsigned NumIf, NumElse, NumEndif;
    305   unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
    306   unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
    307   unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
    308   unsigned NumSkipped;
    309 
    310   /// Predefines - This string is the predefined macros that preprocessor
    311   /// should use from the command line etc.
    312   std::string Predefines;
    313 
    314   /// TokenLexerCache - Cache macro expanders to reduce malloc traffic.
    315   enum { TokenLexerCacheSize = 8 };
    316   unsigned NumCachedTokenLexers;
    317   TokenLexer *TokenLexerCache[TokenLexerCacheSize];
    318 
    319   /// \brief Keeps macro expanded tokens for TokenLexers.
    320   //
    321   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
    322   /// going to lex in the cache and when it finishes the tokens are removed
    323   /// from the end of the cache.
    324   SmallVector<Token, 16> MacroExpandedTokens;
    325   std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
    326 
    327   /// \brief A record of the macro definitions and expansions that
    328   /// occurred during preprocessing.
    329   ///
    330   /// This is an optional side structure that can be enabled with
    331   /// \c createPreprocessingRecord() prior to preprocessing.
    332   PreprocessingRecord *Record;
    333 
    334 private:  // Cached tokens state.
    335   typedef SmallVector<Token, 1> CachedTokensTy;
    336 
    337   /// CachedTokens - Cached tokens are stored here when we do backtracking or
    338   /// lookahead. They are "lexed" by the CachingLex() method.
    339   CachedTokensTy CachedTokens;
    340 
    341   /// CachedLexPos - The position of the cached token that CachingLex() should
    342   /// "lex" next. If it points beyond the CachedTokens vector, it means that
    343   /// a normal Lex() should be invoked.
    344   CachedTokensTy::size_type CachedLexPos;
    345 
    346   /// BacktrackPositions - Stack of backtrack positions, allowing nested
    347   /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to
    348   /// indicate where CachedLexPos should be set when the BackTrack() method is
    349   /// invoked (at which point the last position is popped).
    350   std::vector<CachedTokensTy::size_type> BacktrackPositions;
    351 
    352   struct MacroInfoChain {
    353     MacroInfo MI;
    354     MacroInfoChain *Next;
    355     MacroInfoChain *Prev;
    356   };
    357 
    358   /// MacroInfos are managed as a chain for easy disposal.  This is the head
    359   /// of that list.
    360   MacroInfoChain *MIChainHead;
    361 
    362   /// MICache - A "freelist" of MacroInfo objects that can be reused for quick
    363   /// allocation.
    364   MacroInfoChain *MICache;
    365 
    366   MacroInfo *getInfoForMacro(IdentifierInfo *II) const;
    367 
    368 public:
    369   Preprocessor(DiagnosticsEngine &diags, LangOptions &opts,
    370                const TargetInfo *target,
    371                SourceManager &SM, HeaderSearch &Headers,
    372                ModuleLoader &TheModuleLoader,
    373                IdentifierInfoLookup *IILookup = 0,
    374                bool OwnsHeaderSearch = false,
    375                bool DelayInitialization = false,
    376                bool IncrProcessing = false);
    377 
    378   ~Preprocessor();
    379 
    380   /// \brief Initialize the preprocessor, if the constructor did not already
    381   /// perform the initialization.
    382   ///
    383   /// \param Target Information about the target.
    384   void Initialize(const TargetInfo &Target);
    385 
    386   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
    387   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
    388 
    389   const LangOptions &getLangOpts() const { return LangOpts; }
    390   const TargetInfo &getTargetInfo() const { return *Target; }
    391   FileManager &getFileManager() const { return FileMgr; }
    392   SourceManager &getSourceManager() const { return SourceMgr; }
    393   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
    394 
    395   IdentifierTable &getIdentifierTable() { return Identifiers; }
    396   SelectorTable &getSelectorTable() { return Selectors; }
    397   Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
    398   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
    399 
    400   void setPTHManager(PTHManager* pm);
    401 
    402   PTHManager *getPTHManager() { return PTH.get(); }
    403 
    404   void setExternalSource(ExternalPreprocessorSource *Source) {
    405     ExternalSource = Source;
    406   }
    407 
    408   ExternalPreprocessorSource *getExternalSource() const {
    409     return ExternalSource;
    410   }
    411 
    412   /// \brief Retrieve the module loader associated with this preprocessor.
    413   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
    414 
    415   /// SetCommentRetentionState - Control whether or not the preprocessor retains
    416   /// comments in output.
    417   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
    418     this->KeepComments = KeepComments | KeepMacroComments;
    419     this->KeepMacroComments = KeepMacroComments;
    420   }
    421 
    422   bool getCommentRetentionState() const { return KeepComments; }
    423 
    424   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
    425   bool getPragmasEnabled() const { return PragmasEnabled; }
    426 
    427   void SetSuppressIncludeNotFoundError(bool Suppress) {
    428     SuppressIncludeNotFoundError = Suppress;
    429   }
    430 
    431   bool GetSuppressIncludeNotFoundError() {
    432     return SuppressIncludeNotFoundError;
    433   }
    434 
    435   /// isCurrentLexer - Return true if we are lexing directly from the specified
    436   /// lexer.
    437   bool isCurrentLexer(const PreprocessorLexer *L) const {
    438     return CurPPLexer == L;
    439   }
    440 
    441   /// getCurrentLexer - Return the current lexer being lexed from.  Note
    442   /// that this ignores any potentially active macro expansions and _Pragma
    443   /// expansions going on at the time.
    444   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
    445 
    446   /// getCurrentFileLexer - Return the current file lexer being lexed from.
    447   /// Note that this ignores any potentially active macro expansions and _Pragma
    448   /// expansions going on at the time.
    449   PreprocessorLexer *getCurrentFileLexer() const;
    450 
    451   /// getPPCallbacks/addPPCallbacks - Accessors for preprocessor callbacks.
    452   /// Note that this class takes ownership of any PPCallbacks object given to
    453   /// it.
    454   PPCallbacks *getPPCallbacks() const { return Callbacks; }
    455   void addPPCallbacks(PPCallbacks *C) {
    456     if (Callbacks)
    457       C = new PPChainedCallbacks(C, Callbacks);
    458     Callbacks = C;
    459   }
    460 
    461   /// \brief Given an identifier, return the MacroInfo it is \#defined to
    462   /// or null if it isn't \#define'd.
    463   MacroInfo *getMacroInfo(IdentifierInfo *II) const {
    464     if (!II->hasMacroDefinition())
    465       return 0;
    466 
    467     return getInfoForMacro(II);
    468   }
    469 
    470   /// \brief Specify a macro for this identifier.
    471   void setMacroInfo(IdentifierInfo *II, MacroInfo *MI,
    472                     bool LoadedFromAST = false);
    473   /// \brief Undefine a macro for this identifier.
    474   void clearMacroInfo(IdentifierInfo *II);
    475 
    476   /// macro_iterator/macro_begin/macro_end - This allows you to walk the macro
    477   /// history table. Currently defined macros have
    478   /// IdentifierInfo::hasMacroDefinition() set and an empty
    479   /// MacroInfo::getUndefLoc() at the head of the list.
    480   typedef llvm::DenseMap<IdentifierInfo*,
    481                          MacroInfo*>::const_iterator macro_iterator;
    482   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
    483   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
    484 
    485   const std::string &getPredefines() const { return Predefines; }
    486   /// setPredefines - Set the predefines for this Preprocessor.  These
    487   /// predefines are automatically injected when parsing the main file.
    488   void setPredefines(const char *P) { Predefines = P; }
    489   void setPredefines(const std::string &P) { Predefines = P; }
    490 
    491   /// getIdentifierInfo - Return information about the specified preprocessor
    492   /// identifier token.  The version of this method that takes two character
    493   /// pointers is preferred unless the identifier is already available as a
    494   /// string (this avoids allocation and copying of memory to construct an
    495   /// std::string).
    496   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
    497     return &Identifiers.get(Name);
    498   }
    499 
    500   /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
    501   /// If 'Namespace' is non-null, then it is a token required to exist on the
    502   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
    503   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
    504   void AddPragmaHandler(PragmaHandler *Handler) {
    505     AddPragmaHandler(StringRef(), Handler);
    506   }
    507 
    508   /// RemovePragmaHandler - Remove the specific pragma handler from
    509   /// the preprocessor. If \arg Namespace is non-null, then it should
    510   /// be the namespace that \arg Handler was added to. It is an error
    511   /// to remove a handler that has not been registered.
    512   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
    513   void RemovePragmaHandler(PragmaHandler *Handler) {
    514     RemovePragmaHandler(StringRef(), Handler);
    515   }
    516 
    517   /// \brief Add the specified comment handler to the preprocessor.
    518   void addCommentHandler(CommentHandler *Handler);
    519 
    520   /// \brief Remove the specified comment handler.
    521   ///
    522   /// It is an error to remove a handler that has not been registered.
    523   void removeCommentHandler(CommentHandler *Handler);
    524 
    525   /// \brief Set the code completion handler to the given object.
    526   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
    527     CodeComplete = &Handler;
    528   }
    529 
    530   /// \brief Retrieve the current code-completion handler.
    531   CodeCompletionHandler *getCodeCompletionHandler() const {
    532     return CodeComplete;
    533   }
    534 
    535   /// \brief Clear out the code completion handler.
    536   void clearCodeCompletionHandler() {
    537     CodeComplete = 0;
    538   }
    539 
    540   /// \brief Hook used by the lexer to invoke the "natural language" code
    541   /// completion point.
    542   void CodeCompleteNaturalLanguage();
    543 
    544   /// \brief Retrieve the preprocessing record, or NULL if there is no
    545   /// preprocessing record.
    546   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
    547 
    548   /// \brief Create a new preprocessing record, which will keep track of
    549   /// all macro expansions, macro definitions, etc.
    550   void createPreprocessingRecord(bool RecordConditionalDirectives);
    551 
    552   /// EnterMainSourceFile - Enter the specified FileID as the main source file,
    553   /// which implicitly adds the builtin defines etc.
    554   void EnterMainSourceFile();
    555 
    556   /// EndSourceFile - Inform the preprocessor callbacks that processing is
    557   /// complete.
    558   void EndSourceFile();
    559 
    560   /// EnterSourceFile - Add a source file to the top of the include stack and
    561   /// start lexing tokens from it instead of the current buffer.  Emit an error
    562   /// and don't enter the file on error.
    563   void EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
    564                        SourceLocation Loc);
    565 
    566   /// EnterMacro - Add a Macro to the top of the include stack and start lexing
    567   /// tokens from it instead of the current buffer.  Args specifies the
    568   /// tokens input to a function-like macro.
    569   ///
    570   /// ILEnd specifies the location of the ')' for a function-like macro or the
    571   /// identifier for an object-like macro.
    572   void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
    573                   MacroArgs *Args);
    574 
    575   /// EnterTokenStream - Add a "macro" context to the top of the include stack,
    576   /// which will cause the lexer to start returning the specified tokens.
    577   ///
    578   /// If DisableMacroExpansion is true, tokens lexed from the token stream will
    579   /// not be subject to further macro expansion.  Otherwise, these tokens will
    580   /// be re-macro-expanded when/if expansion is enabled.
    581   ///
    582   /// If OwnsTokens is false, this method assumes that the specified stream of
    583   /// tokens has a permanent owner somewhere, so they do not need to be copied.
    584   /// If it is true, it assumes the array of tokens is allocated with new[] and
    585   /// must be freed.
    586   ///
    587   void EnterTokenStream(const Token *Toks, unsigned NumToks,
    588                         bool DisableMacroExpansion, bool OwnsTokens);
    589 
    590   /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
    591   /// lexer stack.  This should only be used in situations where the current
    592   /// state of the top-of-stack lexer is known.
    593   void RemoveTopOfLexerStack();
    594 
    595   /// EnableBacktrackAtThisPos - From the point that this method is called, and
    596   /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
    597   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
    598   /// make the Preprocessor re-lex the same tokens.
    599   ///
    600   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
    601   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
    602   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
    603   ///
    604   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
    605   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
    606   /// tokens will continue indefinitely.
    607   ///
    608   void EnableBacktrackAtThisPos();
    609 
    610   /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
    611   void CommitBacktrackedTokens();
    612 
    613   /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
    614   /// EnableBacktrackAtThisPos() was previously called.
    615   void Backtrack();
    616 
    617   /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
    618   /// caching of tokens is on.
    619   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
    620 
    621   /// Lex - To lex a token from the preprocessor, just pull a token from the
    622   /// current lexer or macro object.
    623   void Lex(Token &Result) {
    624     switch (CurLexerKind) {
    625     case CLK_Lexer: CurLexer->Lex(Result); break;
    626     case CLK_PTHLexer: CurPTHLexer->Lex(Result); break;
    627     case CLK_TokenLexer: CurTokenLexer->Lex(Result); break;
    628     case CLK_CachingLexer: CachingLex(Result); break;
    629     case CLK_LexAfterModuleImport: LexAfterModuleImport(Result); break;
    630     }
    631   }
    632 
    633   void LexAfterModuleImport(Token &Result);
    634 
    635   /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
    636   /// something not a comment.  This is useful in -E -C mode where comments
    637   /// would foul up preprocessor directive handling.
    638   void LexNonComment(Token &Result) {
    639     do
    640       Lex(Result);
    641     while (Result.getKind() == tok::comment);
    642   }
    643 
    644   /// LexUnexpandedToken - This is just like Lex, but this disables macro
    645   /// expansion of identifier tokens.
    646   void LexUnexpandedToken(Token &Result) {
    647     // Disable macro expansion.
    648     bool OldVal = DisableMacroExpansion;
    649     DisableMacroExpansion = true;
    650     // Lex the token.
    651     Lex(Result);
    652 
    653     // Reenable it.
    654     DisableMacroExpansion = OldVal;
    655   }
    656 
    657   /// LexUnexpandedNonComment - Like LexNonComment, but this disables macro
    658   /// expansion of identifier tokens.
    659   void LexUnexpandedNonComment(Token &Result) {
    660     do
    661       LexUnexpandedToken(Result);
    662     while (Result.getKind() == tok::comment);
    663   }
    664 
    665   /// Disables macro expansion everywhere except for preprocessor directives.
    666   void SetMacroExpansionOnlyInDirectives() {
    667     DisableMacroExpansion = true;
    668     MacroExpansionInDirectivesOverride = true;
    669   }
    670 
    671   /// LookAhead - This peeks ahead N tokens and returns that token without
    672   /// consuming any tokens.  LookAhead(0) returns the next token that would be
    673   /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
    674   /// returns normal tokens after phase 5.  As such, it is equivalent to using
    675   /// 'Lex', not 'LexUnexpandedToken'.
    676   const Token &LookAhead(unsigned N) {
    677     if (CachedLexPos + N < CachedTokens.size())
    678       return CachedTokens[CachedLexPos+N];
    679     else
    680       return PeekAhead(N+1);
    681   }
    682 
    683   /// RevertCachedTokens - When backtracking is enabled and tokens are cached,
    684   /// this allows to revert a specific number of tokens.
    685   /// Note that the number of tokens being reverted should be up to the last
    686   /// backtrack position, not more.
    687   void RevertCachedTokens(unsigned N) {
    688     assert(isBacktrackEnabled() &&
    689            "Should only be called when tokens are cached for backtracking");
    690     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
    691          && "Should revert tokens up to the last backtrack position, not more");
    692     assert(signed(CachedLexPos) - signed(N) >= 0 &&
    693            "Corrupted backtrack positions ?");
    694     CachedLexPos -= N;
    695   }
    696 
    697   /// EnterToken - Enters a token in the token stream to be lexed next. If
    698   /// BackTrack() is called afterwards, the token will remain at the insertion
    699   /// point.
    700   void EnterToken(const Token &Tok) {
    701     EnterCachingLexMode();
    702     CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
    703   }
    704 
    705   /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching
    706   /// tokens (because backtrack is enabled) it should replace the most recent
    707   /// cached tokens with the given annotation token. This function has no effect
    708   /// if backtracking is not enabled.
    709   ///
    710   /// Note that the use of this function is just for optimization; so that the
    711   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
    712   /// invoked.
    713   void AnnotateCachedTokens(const Token &Tok) {
    714     assert(Tok.isAnnotation() && "Expected annotation token");
    715     if (CachedLexPos != 0 && isBacktrackEnabled())
    716       AnnotatePreviousCachedTokens(Tok);
    717   }
    718 
    719   /// \brief Replace the last token with an annotation token.
    720   ///
    721   /// Like AnnotateCachedTokens(), this routine replaces an
    722   /// already-parsed (and resolved) token with an annotation
    723   /// token. However, this routine only replaces the last token with
    724   /// the annotation token; it does not affect any other cached
    725   /// tokens. This function has no effect if backtracking is not
    726   /// enabled.
    727   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
    728     assert(Tok.isAnnotation() && "Expected annotation token");
    729     if (CachedLexPos != 0 && isBacktrackEnabled())
    730       CachedTokens[CachedLexPos-1] = Tok;
    731   }
    732 
    733   /// TypoCorrectToken - Update the current token to represent the provided
    734   /// identifier, in order to cache an action performed by typo correction.
    735   void TypoCorrectToken(const Token &Tok) {
    736     assert(Tok.getIdentifierInfo() && "Expected identifier token");
    737     if (CachedLexPos != 0 && isBacktrackEnabled())
    738       CachedTokens[CachedLexPos-1] = Tok;
    739   }
    740 
    741   /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
    742   /// CurTokenLexer pointers.
    743   void recomputeCurLexerKind();
    744 
    745   /// \brief Returns true if incremental processing is enabled
    746   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
    747 
    748   /// \brief Enables the incremental processing
    749   void enableIncrementalProcessing(bool value = true) {
    750     IncrementalProcessing = value;
    751   }
    752 
    753   /// \brief Specify the point at which code-completion will be performed.
    754   ///
    755   /// \param File the file in which code completion should occur. If
    756   /// this file is included multiple times, code-completion will
    757   /// perform completion the first time it is included. If NULL, this
    758   /// function clears out the code-completion point.
    759   ///
    760   /// \param Line the line at which code completion should occur
    761   /// (1-based).
    762   ///
    763   /// \param Column the column at which code completion should occur
    764   /// (1-based).
    765   ///
    766   /// \returns true if an error occurred, false otherwise.
    767   bool SetCodeCompletionPoint(const FileEntry *File,
    768                               unsigned Line, unsigned Column);
    769 
    770   /// \brief Determine if we are performing code completion.
    771   bool isCodeCompletionEnabled() const { return CodeCompletionFile != 0; }
    772 
    773   /// \brief Returns the location of the code-completion point.
    774   /// Returns an invalid location if code-completion is not enabled or the file
    775   /// containing the code-completion point has not been lexed yet.
    776   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
    777 
    778   /// \brief Returns the start location of the file of code-completion point.
    779   /// Returns an invalid location if code-completion is not enabled or the file
    780   /// containing the code-completion point has not been lexed yet.
    781   SourceLocation getCodeCompletionFileLoc() const {
    782     return CodeCompletionFileLoc;
    783   }
    784 
    785   /// \brief Returns true if code-completion is enabled and we have hit the
    786   /// code-completion point.
    787   bool isCodeCompletionReached() const { return CodeCompletionReached; }
    788 
    789   /// \brief Note that we hit the code-completion point.
    790   void setCodeCompletionReached() {
    791     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
    792     CodeCompletionReached = true;
    793     // Silence any diagnostics that occur after we hit the code-completion.
    794     getDiagnostics().setSuppressAllDiagnostics(true);
    795   }
    796 
    797   /// \brief The location of the currently-active \#pragma clang
    798   /// arc_cf_code_audited begin.  Returns an invalid location if there
    799   /// is no such pragma active.
    800   SourceLocation getPragmaARCCFCodeAuditedLoc() const {
    801     return PragmaARCCFCodeAuditedLoc;
    802   }
    803 
    804   /// \brief Set the location of the currently-active \#pragma clang
    805   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
    806   void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
    807     PragmaARCCFCodeAuditedLoc = Loc;
    808   }
    809 
    810   /// \brief Instruct the preprocessor to skip part of the main source file.
    811   ///
    812   /// \param Bytes The number of bytes in the preamble to skip.
    813   ///
    814   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
    815   /// start of a line.
    816   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
    817     SkipMainFilePreamble.first = Bytes;
    818     SkipMainFilePreamble.second = StartOfLine;
    819   }
    820 
    821   /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
    822   /// the specified Token's location, translating the token's start
    823   /// position in the current buffer into a SourcePosition object for rendering.
    824   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
    825     return Diags->Report(Loc, DiagID);
    826   }
    827 
    828   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
    829     return Diags->Report(Tok.getLocation(), DiagID);
    830   }
    831 
    832   /// getSpelling() - Return the 'spelling' of the token at the given
    833   /// location; does not go up to the spelling location or down to the
    834   /// expansion location.
    835   ///
    836   /// \param buffer A buffer which will be used only if the token requires
    837   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
    838   /// \param invalid If non-null, will be set \c true if an error occurs.
    839   StringRef getSpelling(SourceLocation loc,
    840                               SmallVectorImpl<char> &buffer,
    841                               bool *invalid = 0) const {
    842     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
    843   }
    844 
    845   /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
    846   /// token is the characters used to represent the token in the source file
    847   /// after trigraph expansion and escaped-newline folding.  In particular, this
    848   /// wants to get the true, uncanonicalized, spelling of things like digraphs
    849   /// UCNs, etc.
    850   ///
    851   /// \param Invalid If non-null, will be set \c true if an error occurs.
    852   std::string getSpelling(const Token &Tok, bool *Invalid = 0) const {
    853     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
    854   }
    855 
    856   /// getSpelling - This method is used to get the spelling of a token into a
    857   /// preallocated buffer, instead of as an std::string.  The caller is required
    858   /// to allocate enough space for the token, which is guaranteed to be at least
    859   /// Tok.getLength() bytes long.  The length of the actual result is returned.
    860   ///
    861   /// Note that this method may do two possible things: it may either fill in
    862   /// the buffer specified with characters, or it may *change the input pointer*
    863   /// to point to a constant buffer with the data already in it (avoiding a
    864   /// copy).  The caller is not allowed to modify the returned buffer pointer
    865   /// if an internal buffer is returned.
    866   unsigned getSpelling(const Token &Tok, const char *&Buffer,
    867                        bool *Invalid = 0) const {
    868     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
    869   }
    870 
    871   /// getSpelling - This method is used to get the spelling of a token into a
    872   /// SmallVector. Note that the returned StringRef may not point to the
    873   /// supplied buffer if a copy can be avoided.
    874   StringRef getSpelling(const Token &Tok,
    875                         SmallVectorImpl<char> &Buffer,
    876                         bool *Invalid = 0) const;
    877 
    878   /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
    879   /// with length 1, return the character.
    880   char getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
    881                                                    bool *Invalid = 0) const {
    882     assert(Tok.is(tok::numeric_constant) &&
    883            Tok.getLength() == 1 && "Called on unsupported token");
    884     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
    885 
    886     // If the token is carrying a literal data pointer, just use it.
    887     if (const char *D = Tok.getLiteralData())
    888       return *D;
    889 
    890     // Otherwise, fall back on getCharacterData, which is slower, but always
    891     // works.
    892     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
    893   }
    894 
    895   /// \brief Retrieve the name of the immediate macro expansion.
    896   ///
    897   /// This routine starts from a source location, and finds the name of the macro
    898   /// responsible for its immediate expansion. It looks through any intervening
    899   /// macro argument expansions to compute this. It returns a StringRef which
    900   /// refers to the SourceManager-owned buffer of the source where that macro
    901   /// name is spelled. Thus, the result shouldn't out-live the SourceManager.
    902   StringRef getImmediateMacroName(SourceLocation Loc) {
    903     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
    904   }
    905 
    906   /// CreateString - Plop the specified string into a scratch buffer and set the
    907   /// specified token's location and length to it.  If specified, the source
    908   /// location provides a location of the expansion point of the token.
    909   void CreateString(const char *Buf, unsigned Len, Token &Tok,
    910                     SourceLocation ExpansionLocStart = SourceLocation(),
    911                     SourceLocation ExpansionLocEnd = SourceLocation());
    912 
    913   /// \brief Computes the source location just past the end of the
    914   /// token at this source location.
    915   ///
    916   /// This routine can be used to produce a source location that
    917   /// points just past the end of the token referenced by \p Loc, and
    918   /// is generally used when a diagnostic needs to point just after a
    919   /// token where it expected something different that it received. If
    920   /// the returned source location would not be meaningful (e.g., if
    921   /// it points into a macro), this routine returns an invalid
    922   /// source location.
    923   ///
    924   /// \param Offset an offset from the end of the token, where the source
    925   /// location should refer to. The default offset (0) produces a source
    926   /// location pointing just past the end of the token; an offset of 1 produces
    927   /// a source location pointing to the last character in the token, etc.
    928   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
    929     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
    930   }
    931 
    932   /// \brief Returns true if the given MacroID location points at the first
    933   /// token of the macro expansion.
    934   ///
    935   /// \param MacroBegin If non-null and function returns true, it is set to
    936   /// begin location of the macro.
    937   bool isAtStartOfMacroExpansion(SourceLocation loc,
    938                                  SourceLocation *MacroBegin = 0) const {
    939     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
    940                                             MacroBegin);
    941   }
    942 
    943   /// \brief Returns true if the given MacroID location points at the last
    944   /// token of the macro expansion.
    945   ///
    946   /// \param MacroEnd If non-null and function returns true, it is set to
    947   /// end location of the macro.
    948   bool isAtEndOfMacroExpansion(SourceLocation loc,
    949                                SourceLocation *MacroEnd = 0) const {
    950     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
    951   }
    952 
    953   /// DumpToken - Print the token to stderr, used for debugging.
    954   ///
    955   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
    956   void DumpLocation(SourceLocation Loc) const;
    957   void DumpMacro(const MacroInfo &MI) const;
    958 
    959   /// AdvanceToTokenCharacter - Given a location that specifies the start of a
    960   /// token, return a new location that specifies a character within the token.
    961   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
    962                                          unsigned Char) const {
    963     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
    964   }
    965 
    966   /// IncrementPasteCounter - Increment the counters for the number of token
    967   /// paste operations performed.  If fast was specified, this is a 'fast paste'
    968   /// case we handled.
    969   ///
    970   void IncrementPasteCounter(bool isFast) {
    971     if (isFast)
    972       ++NumFastTokenPaste;
    973     else
    974       ++NumTokenPaste;
    975   }
    976 
    977   void PrintStats();
    978 
    979   size_t getTotalMemory() const;
    980 
    981   /// HandleMicrosoftCommentPaste - When the macro expander pastes together a
    982   /// comment (/##/) in microsoft mode, this method handles updating the current
    983   /// state, returning the token on the next source line.
    984   void HandleMicrosoftCommentPaste(Token &Tok);
    985 
    986   //===--------------------------------------------------------------------===//
    987   // Preprocessor callback methods.  These are invoked by a lexer as various
    988   // directives and events are found.
    989 
    990   /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
    991   /// identifier information for the token and install it into the token,
    992   /// updating the token kind accordingly.
    993   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
    994 
    995 private:
    996   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
    997 
    998 public:
    999 
   1000   // SetPoisonReason - Call this function to indicate the reason for
   1001   // poisoning an identifier. If that identifier is accessed while
   1002   // poisoned, then this reason will be used instead of the default
   1003   // "poisoned" diagnostic.
   1004   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
   1005 
   1006   // HandlePoisonedIdentifier - Display reason for poisoned
   1007   // identifier.
   1008   void HandlePoisonedIdentifier(Token & Tok);
   1009 
   1010   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
   1011     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
   1012       if(II->isPoisoned()) {
   1013         HandlePoisonedIdentifier(Identifier);
   1014       }
   1015     }
   1016   }
   1017 
   1018 private:
   1019   /// Identifiers used for SEH handling in Borland. These are only
   1020   /// allowed in particular circumstances
   1021   // __except block
   1022   IdentifierInfo *Ident__exception_code,
   1023                  *Ident___exception_code,
   1024                  *Ident_GetExceptionCode;
   1025   // __except filter expression
   1026   IdentifierInfo *Ident__exception_info,
   1027                  *Ident___exception_info,
   1028                  *Ident_GetExceptionInfo;
   1029   // __finally
   1030   IdentifierInfo *Ident__abnormal_termination,
   1031                  *Ident___abnormal_termination,
   1032                  *Ident_AbnormalTermination;
   1033 public:
   1034   void PoisonSEHIdentifiers(bool Poison = true); // Borland
   1035 
   1036   /// HandleIdentifier - This callback is invoked when the lexer reads an
   1037   /// identifier and has filled in the tokens IdentifierInfo member.  This
   1038   /// callback potentially macro expands it or turns it into a named token (like
   1039   /// 'for').
   1040   void HandleIdentifier(Token &Identifier);
   1041 
   1042 
   1043   /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
   1044   /// the current file.  This either returns the EOF token and returns true, or
   1045   /// pops a level off the include stack and returns false, at which point the
   1046   /// client should call lex again.
   1047   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
   1048 
   1049   /// HandleEndOfTokenLexer - This callback is invoked when the current
   1050   /// TokenLexer hits the end of its token stream.
   1051   bool HandleEndOfTokenLexer(Token &Result);
   1052 
   1053   /// HandleDirective - This callback is invoked when the lexer sees a # token
   1054   /// at the start of a line.  This consumes the directive, modifies the
   1055   /// lexer/preprocessor state, and advances the lexer(s) so that the next token
   1056   /// read is the correct one.
   1057   void HandleDirective(Token &Result);
   1058 
   1059   /// CheckEndOfDirective - Ensure that the next token is a tok::eod token.  If
   1060   /// not, emit a diagnostic and consume up until the eod.  If EnableMacros is
   1061   /// true, then we consider macros that expand to zero tokens as being ok.
   1062   void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
   1063 
   1064   /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
   1065   /// current line until the tok::eod token is found.
   1066   void DiscardUntilEndOfDirective();
   1067 
   1068   /// SawDateOrTime - This returns true if the preprocessor has seen a use of
   1069   /// __DATE__ or __TIME__ in the file so far.
   1070   bool SawDateOrTime() const {
   1071     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
   1072   }
   1073   unsigned getCounterValue() const { return CounterValue; }
   1074   void setCounterValue(unsigned V) { CounterValue = V; }
   1075 
   1076   /// \brief Retrieves the module that we're currently building, if any.
   1077   Module *getCurrentModule();
   1078 
   1079   /// \brief Allocate a new MacroInfo object with the provided SourceLocation.
   1080   MacroInfo *AllocateMacroInfo(SourceLocation L);
   1081 
   1082   /// \brief Allocate a new MacroInfo object which is clone of \p MI.
   1083   MacroInfo *CloneMacroInfo(const MacroInfo &MI);
   1084 
   1085   /// \brief Turn the specified lexer token into a fully checked and spelled
   1086   /// filename, e.g. as an operand of \#include.
   1087   ///
   1088   /// The caller is expected to provide a buffer that is large enough to hold
   1089   /// the spelling of the filename, but is also expected to handle the case
   1090   /// when this method decides to use a different buffer.
   1091   ///
   1092   /// \returns true if the input filename was in <>'s or false if it was
   1093   /// in ""'s.
   1094   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
   1095 
   1096   /// \brief Given a "foo" or \<foo> reference, look up the indicated file.
   1097   ///
   1098   /// Returns null on failure.  \p isAngled indicates whether the file
   1099   /// reference is for system \#include's or not (i.e. using <> instead of "").
   1100   const FileEntry *LookupFile(StringRef Filename,
   1101                               bool isAngled, const DirectoryLookup *FromDir,
   1102                               const DirectoryLookup *&CurDir,
   1103                               SmallVectorImpl<char> *SearchPath,
   1104                               SmallVectorImpl<char> *RelativePath,
   1105                               Module **SuggestedModule,
   1106                               bool SkipCache = false);
   1107 
   1108   /// GetCurLookup - The DirectoryLookup structure used to find the current
   1109   /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
   1110   /// implement \#include_next and find directory-specific properties.
   1111   const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
   1112 
   1113   /// \brief Return true if we're in the top-level file, not in a \#include.
   1114   bool isInPrimaryFile() const;
   1115 
   1116   /// ConcatenateIncludeName - Handle cases where the \#include name is expanded
   1117   /// from a macro as multiple tokens, which need to be glued together.  This
   1118   /// occurs for code like:
   1119   /// \code
   1120   ///    \#define FOO <x/y.h>
   1121   ///    \#include FOO
   1122   /// \endcode
   1123   /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
   1124   ///
   1125   /// This code concatenates and consumes tokens up to the '>' token.  It
   1126   /// returns false if the > was found, otherwise it returns true if it finds
   1127   /// and consumes the EOD marker.
   1128   bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
   1129                               SourceLocation &End);
   1130 
   1131   /// LexOnOffSwitch - Lex an on-off-switch (C99 6.10.6p2) and verify that it is
   1132   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
   1133   bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
   1134 
   1135 private:
   1136 
   1137   void PushIncludeMacroStack() {
   1138     IncludeMacroStack.push_back(IncludeStackInfo(CurLexerKind,
   1139                                                  CurLexer.take(),
   1140                                                  CurPTHLexer.take(),
   1141                                                  CurPPLexer,
   1142                                                  CurTokenLexer.take(),
   1143                                                  CurDirLookup));
   1144     CurPPLexer = 0;
   1145   }
   1146 
   1147   void PopIncludeMacroStack() {
   1148     CurLexer.reset(IncludeMacroStack.back().TheLexer);
   1149     CurPTHLexer.reset(IncludeMacroStack.back().ThePTHLexer);
   1150     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
   1151     CurTokenLexer.reset(IncludeMacroStack.back().TheTokenLexer);
   1152     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
   1153     CurLexerKind = IncludeMacroStack.back().CurLexerKind;
   1154     IncludeMacroStack.pop_back();
   1155   }
   1156 
   1157   /// \brief Allocate a new MacroInfo object.
   1158   MacroInfo *AllocateMacroInfo();
   1159 
   1160   /// \brief Release the specified MacroInfo for re-use.
   1161   ///
   1162   /// This memory will  be reused for allocating new MacroInfo objects.
   1163   void ReleaseMacroInfo(MacroInfo* MI);
   1164 
   1165   /// ReadMacroName - Lex and validate a macro name, which occurs after a
   1166   /// \#define or \#undef.  This emits a diagnostic, sets the token kind to eod,
   1167   /// and discards the rest of the macro line if the macro name is invalid.
   1168   void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
   1169 
   1170   /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
   1171   /// definition has just been read.  Lex the rest of the arguments and the
   1172   /// closing ), updating MI with what we learn and saving in LastTok the
   1173   /// last token read.
   1174   /// Return true if an error occurs parsing the arg list.
   1175   bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
   1176 
   1177   /// We just read a \#if or related directive and decided that the
   1178   /// subsequent tokens are in the \#if'd out portion of the
   1179   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
   1180   /// FoundNonSkipPortion is true, then we have already emitted code for part of
   1181   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
   1182   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
   1183   /// already seen one so a \#else directive is a duplicate.  When this returns,
   1184   /// the caller can lex the first valid token.
   1185   void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
   1186                                     bool FoundNonSkipPortion, bool FoundElse,
   1187                                     SourceLocation ElseLoc = SourceLocation());
   1188 
   1189   /// \brief A fast PTH version of SkipExcludedConditionalBlock.
   1190   void PTHSkipExcludedConditionalBlock();
   1191 
   1192   /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
   1193   /// may occur after a #if or #elif directive and return it as a bool.  If the
   1194   /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
   1195   bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
   1196 
   1197   /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
   1198   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
   1199   void RegisterBuiltinPragmas();
   1200 
   1201   /// \brief Register builtin macros such as __LINE__ with the identifier table.
   1202   void RegisterBuiltinMacros();
   1203 
   1204   /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
   1205   /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
   1206   /// the macro should not be expanded return true, otherwise return false.
   1207   bool HandleMacroExpandedIdentifier(Token &Tok, MacroInfo *MI);
   1208 
   1209   /// \brief Cache macro expanded tokens for TokenLexers.
   1210   //
   1211   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
   1212   /// going to lex in the cache and when it finishes the tokens are removed
   1213   /// from the end of the cache.
   1214   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
   1215                                   ArrayRef<Token> tokens);
   1216   void removeCachedMacroExpandedTokensOfLastLexer();
   1217   friend void TokenLexer::ExpandFunctionArguments();
   1218 
   1219   /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
   1220   /// lexed is a '('.  If so, consume the token and return true, if not, this
   1221   /// method should have no observable side-effect on the lexed tokens.
   1222   bool isNextPPTokenLParen();
   1223 
   1224   /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
   1225   /// invoked to read all of the formal arguments specified for the macro
   1226   /// invocation.  This returns null on error.
   1227   MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
   1228                                        SourceLocation &ExpansionEnd);
   1229 
   1230   /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
   1231   /// as a builtin macro, handle it and return the next token as 'Tok'.
   1232   void ExpandBuiltinMacro(Token &Tok);
   1233 
   1234   /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
   1235   /// return the first token after the directive.  The _Pragma token has just
   1236   /// been read into 'Tok'.
   1237   void Handle_Pragma(Token &Tok);
   1238 
   1239   /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
   1240   /// is not enclosed within a string literal.
   1241   void HandleMicrosoft__pragma(Token &Tok);
   1242 
   1243   /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
   1244   /// start lexing tokens from it instead of the current buffer.
   1245   void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
   1246 
   1247   /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and
   1248   /// start getting tokens from it using the PTH cache.
   1249   void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
   1250 
   1251   /// IsFileLexer - Returns true if we are lexing from a file and not a
   1252   ///  pragma or a macro.
   1253   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
   1254     return L ? !L->isPragmaLexer() : P != 0;
   1255   }
   1256 
   1257   static bool IsFileLexer(const IncludeStackInfo& I) {
   1258     return IsFileLexer(I.TheLexer, I.ThePPLexer);
   1259   }
   1260 
   1261   bool IsFileLexer() const {
   1262     return IsFileLexer(CurLexer.get(), CurPPLexer);
   1263   }
   1264 
   1265   //===--------------------------------------------------------------------===//
   1266   // Caching stuff.
   1267   void CachingLex(Token &Result);
   1268   bool InCachingLexMode() const {
   1269     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
   1270     // that we are past EOF, not that we are in CachingLex mode.
   1271     return CurPPLexer == 0 && CurTokenLexer == 0 && CurPTHLexer == 0 &&
   1272            !IncludeMacroStack.empty();
   1273   }
   1274   void EnterCachingLexMode();
   1275   void ExitCachingLexMode() {
   1276     if (InCachingLexMode())
   1277       RemoveTopOfLexerStack();
   1278   }
   1279   const Token &PeekAhead(unsigned N);
   1280   void AnnotatePreviousCachedTokens(const Token &Tok);
   1281 
   1282   //===--------------------------------------------------------------------===//
   1283   /// Handle*Directive - implement the various preprocessor directives.  These
   1284   /// should side-effect the current preprocessor object so that the next call
   1285   /// to Lex() will return the appropriate token next.
   1286   void HandleLineDirective(Token &Tok);
   1287   void HandleDigitDirective(Token &Tok);
   1288   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
   1289   void HandleIdentSCCSDirective(Token &Tok);
   1290   void HandleMacroPublicDirective(Token &Tok);
   1291   void HandleMacroPrivateDirective(Token &Tok);
   1292 
   1293   // File inclusion.
   1294   void HandleIncludeDirective(SourceLocation HashLoc,
   1295                               Token &Tok,
   1296                               const DirectoryLookup *LookupFrom = 0,
   1297                               bool isImport = false);
   1298   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
   1299   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
   1300   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
   1301   void HandleMicrosoftImportDirective(Token &Tok);
   1302 
   1303   // Macro handling.
   1304   void HandleDefineDirective(Token &Tok);
   1305   void HandleUndefDirective(Token &Tok);
   1306 
   1307   // Conditional Inclusion.
   1308   void HandleIfdefDirective(Token &Tok, bool isIfndef,
   1309                             bool ReadAnyTokensBeforeDirective);
   1310   void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
   1311   void HandleEndifDirective(Token &Tok);
   1312   void HandleElseDirective(Token &Tok);
   1313   void HandleElifDirective(Token &Tok);
   1314 
   1315   // Pragmas.
   1316   void HandlePragmaDirective(unsigned Introducer);
   1317 public:
   1318   void HandlePragmaOnce(Token &OnceTok);
   1319   void HandlePragmaMark();
   1320   void HandlePragmaPoison(Token &PoisonTok);
   1321   void HandlePragmaSystemHeader(Token &SysHeaderTok);
   1322   void HandlePragmaDependency(Token &DependencyTok);
   1323   void HandlePragmaComment(Token &CommentTok);
   1324   void HandlePragmaMessage(Token &MessageTok);
   1325   void HandlePragmaPushMacro(Token &Tok);
   1326   void HandlePragmaPopMacro(Token &Tok);
   1327   void HandlePragmaIncludeAlias(Token &Tok);
   1328   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
   1329 
   1330   // Return true and store the first token only if any CommentHandler
   1331   // has inserted some tokens and getCommentRetentionState() is false.
   1332   bool HandleComment(Token &Token, SourceRange Comment);
   1333 
   1334   /// \brief A macro is used, update information about macros that need unused
   1335   /// warnings.
   1336   void markMacroAsUsed(MacroInfo *MI);
   1337 };
   1338 
   1339 /// \brief Abstract base class that describes a handler that will receive
   1340 /// source ranges for each of the comments encountered in the source file.
   1341 class CommentHandler {
   1342 public:
   1343   virtual ~CommentHandler();
   1344 
   1345   // The handler shall return true if it has pushed any tokens
   1346   // to be read using e.g. EnterToken or EnterTokenStream.
   1347   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
   1348 };
   1349 
   1350 }  // end namespace clang
   1351 
   1352 #endif
   1353