Home | History | Annotate | Download | only in Lex
      1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This file defines the Preprocessor interface.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
     15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
     16 
     17 #include "clang/Lex/MacroInfo.h"
     18 #include "clang/Lex/Lexer.h"
     19 #include "clang/Lex/PTHLexer.h"
     20 #include "clang/Lex/PPCallbacks.h"
     21 #include "clang/Lex/TokenLexer.h"
     22 #include "clang/Lex/PTHManager.h"
     23 #include "clang/Basic/Builtins.h"
     24 #include "clang/Basic/Diagnostic.h"
     25 #include "clang/Basic/IdentifierTable.h"
     26 #include "clang/Basic/SourceLocation.h"
     27 #include "llvm/ADT/DenseMap.h"
     28 #include "llvm/ADT/IntrusiveRefCntPtr.h"
     29 #include "llvm/ADT/SmallPtrSet.h"
     30 #include "llvm/ADT/OwningPtr.h"
     31 #include "llvm/ADT/SmallVector.h"
     32 #include "llvm/ADT/ArrayRef.h"
     33 #include "llvm/Support/Allocator.h"
     34 #include <vector>
     35 
     36 namespace llvm {
     37   template<unsigned InternalLen> class SmallString;
     38 }
     39 
     40 namespace clang {
     41 
     42 class SourceManager;
     43 class ExternalPreprocessorSource;
     44 class FileManager;
     45 class FileEntry;
     46 class HeaderSearch;
     47 class PragmaNamespace;
     48 class PragmaHandler;
     49 class CommentHandler;
     50 class ScratchBuffer;
     51 class TargetInfo;
     52 class PPCallbacks;
     53 class CodeCompletionHandler;
     54 class DirectoryLookup;
     55 class PreprocessingRecord;
     56 class ModuleLoader;
     57 
     58 /// Preprocessor - This object engages in a tight little dance with the lexer to
     59 /// efficiently preprocess tokens.  Lexers know only about tokens within a
     60 /// single source file, and don't know anything about preprocessor-level issues
     61 /// like the #include stack, token expansion, etc.
     62 ///
     63 class Preprocessor : public RefCountedBase<Preprocessor> {
     64   DiagnosticsEngine        *Diags;
     65   LangOptions       &LangOpts;
     66   const TargetInfo  *Target;
     67   FileManager       &FileMgr;
     68   SourceManager     &SourceMgr;
     69   ScratchBuffer     *ScratchBuf;
     70   HeaderSearch      &HeaderInfo;
     71   ModuleLoader      &TheModuleLoader;
     72 
     73   /// \brief External source of macros.
     74   ExternalPreprocessorSource *ExternalSource;
     75 
     76 
     77   /// PTH - An optional PTHManager object used for getting tokens from
     78   ///  a token cache rather than lexing the original source file.
     79   OwningPtr<PTHManager> PTH;
     80 
     81   /// BP - A BumpPtrAllocator object used to quickly allocate and release
     82   ///  objects internal to the Preprocessor.
     83   llvm::BumpPtrAllocator BP;
     84 
     85   /// Identifiers for builtin macros and other builtins.
     86   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
     87   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
     88   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
     89   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
     90   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
     91   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
     92   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
     93   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
     94   IdentifierInfo *Ident__has_feature;              // __has_feature
     95   IdentifierInfo *Ident__has_extension;            // __has_extension
     96   IdentifierInfo *Ident__has_builtin;              // __has_builtin
     97   IdentifierInfo *Ident__has_attribute;            // __has_attribute
     98   IdentifierInfo *Ident__has_include;              // __has_include
     99   IdentifierInfo *Ident__has_include_next;         // __has_include_next
    100   IdentifierInfo *Ident__has_warning;              // __has_warning
    101 
    102   SourceLocation DATELoc, TIMELoc;
    103   unsigned CounterValue;  // Next __COUNTER__ value.
    104 
    105   enum {
    106     /// MaxIncludeStackDepth - Maximum depth of #includes.
    107     MaxAllowedIncludeStackDepth = 200
    108   };
    109 
    110   // State that is set before the preprocessor begins.
    111   bool KeepComments : 1;
    112   bool KeepMacroComments : 1;
    113   bool SuppressIncludeNotFoundError : 1;
    114 
    115   // State that changes while the preprocessor runs:
    116   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
    117 
    118   /// Whether the preprocessor owns the header search object.
    119   bool OwnsHeaderSearch : 1;
    120 
    121   /// DisableMacroExpansion - True if macro expansion is disabled.
    122   bool DisableMacroExpansion : 1;
    123 
    124   /// \brief Whether we have already loaded macros from the external source.
    125   mutable bool ReadMacrosFromExternalSource : 1;
    126 
    127   /// \brief True if we are pre-expanding macro arguments.
    128   bool InMacroArgPreExpansion;
    129 
    130   /// Identifiers - This is mapping/lookup information for all identifiers in
    131   /// the program, including program keywords.
    132   mutable IdentifierTable Identifiers;
    133 
    134   /// Selectors - This table contains all the selectors in the program. Unlike
    135   /// IdentifierTable above, this table *isn't* populated by the preprocessor.
    136   /// It is declared/expanded here because it's role/lifetime is
    137   /// conceptually similar the IdentifierTable. In addition, the current control
    138   /// flow (in clang::ParseAST()), make it convenient to put here.
    139   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
    140   /// the lifetime of the preprocessor.
    141   SelectorTable Selectors;
    142 
    143   /// BuiltinInfo - Information about builtins.
    144   Builtin::Context BuiltinInfo;
    145 
    146   /// PragmaHandlers - This tracks all of the pragmas that the client registered
    147   /// with this preprocessor.
    148   PragmaNamespace *PragmaHandlers;
    149 
    150   /// \brief Tracks all of the comment handlers that the client registered
    151   /// with this preprocessor.
    152   std::vector<CommentHandler *> CommentHandlers;
    153 
    154   /// \brief True if we want to ignore EOF token and continue later on (thus
    155   /// avoid tearing the Lexer and etc. down).
    156   bool IncrementalProcessing;
    157 
    158   /// \brief The code-completion handler.
    159   CodeCompletionHandler *CodeComplete;
    160 
    161   /// \brief The file that we're performing code-completion for, if any.
    162   const FileEntry *CodeCompletionFile;
    163 
    164   /// \brief The offset in file for the code-completion point.
    165   unsigned CodeCompletionOffset;
    166 
    167   /// \brief The location for the code-completion point. This gets instantiated
    168   /// when the CodeCompletionFile gets #include'ed for preprocessing.
    169   SourceLocation CodeCompletionLoc;
    170 
    171   /// \brief The start location for the file of the code-completion point.
    172   /// This gets instantiated when the CodeCompletionFile gets #include'ed
    173   /// for preprocessing.
    174   SourceLocation CodeCompletionFileLoc;
    175 
    176   /// \brief The source location of the 'import' contextual keyword we just
    177   /// lexed, if any.
    178   SourceLocation ModuleImportLoc;
    179 
    180   /// \brief The module import path that we're currently processing.
    181   llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2>
    182     ModuleImportPath;
    183 
    184   /// \brief Whether the module import expectes an identifier next. Otherwise,
    185   /// it expects a '.' or ';'.
    186   bool ModuleImportExpectsIdentifier;
    187 
    188   /// \brief The source location of the currently-active
    189   /// #pragma clang arc_cf_code_audited begin.
    190   SourceLocation PragmaARCCFCodeAuditedLoc;
    191 
    192   /// \brief True if we hit the code-completion point.
    193   bool CodeCompletionReached;
    194 
    195   /// \brief The number of bytes that we will initially skip when entering the
    196   /// main file, which is used when loading a precompiled preamble, along
    197   /// with a flag that indicates whether skipping this number of bytes will
    198   /// place the lexer at the start of a line.
    199   std::pair<unsigned, bool> SkipMainFilePreamble;
    200 
    201   /// CurLexer - This is the current top of the stack that we're lexing from if
    202   /// not expanding a macro and we are lexing directly from source code.
    203   ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
    204   OwningPtr<Lexer> CurLexer;
    205 
    206   /// CurPTHLexer - This is the current top of stack that we're lexing from if
    207   ///  not expanding from a macro and we are lexing from a PTH cache.
    208   ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
    209   OwningPtr<PTHLexer> CurPTHLexer;
    210 
    211   /// CurPPLexer - This is the current top of the stack what we're lexing from
    212   ///  if not expanding a macro.  This is an alias for either CurLexer or
    213   ///  CurPTHLexer.
    214   PreprocessorLexer *CurPPLexer;
    215 
    216   /// CurLookup - The DirectoryLookup structure used to find the current
    217   /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
    218   /// implement #include_next and find directory-specific properties.
    219   const DirectoryLookup *CurDirLookup;
    220 
    221   /// CurTokenLexer - This is the current macro we are expanding, if we are
    222   /// expanding a macro.  One of CurLexer and CurTokenLexer must be null.
    223   OwningPtr<TokenLexer> CurTokenLexer;
    224 
    225   /// \brief The kind of lexer we're currently working with.
    226   enum CurLexerKind {
    227     CLK_Lexer,
    228     CLK_PTHLexer,
    229     CLK_TokenLexer,
    230     CLK_CachingLexer,
    231     CLK_LexAfterModuleImport
    232   } CurLexerKind;
    233 
    234   /// IncludeMacroStack - This keeps track of the stack of files currently
    235   /// #included, and macros currently being expanded from, not counting
    236   /// CurLexer/CurTokenLexer.
    237   struct IncludeStackInfo {
    238     enum CurLexerKind     CurLexerKind;
    239     Lexer                 *TheLexer;
    240     PTHLexer              *ThePTHLexer;
    241     PreprocessorLexer     *ThePPLexer;
    242     TokenLexer            *TheTokenLexer;
    243     const DirectoryLookup *TheDirLookup;
    244 
    245     IncludeStackInfo(enum CurLexerKind K, Lexer *L, PTHLexer* P,
    246                      PreprocessorLexer* PPL,
    247                      TokenLexer* TL, const DirectoryLookup *D)
    248       : CurLexerKind(K), TheLexer(L), ThePTHLexer(P), ThePPLexer(PPL),
    249         TheTokenLexer(TL), TheDirLookup(D) {}
    250   };
    251   std::vector<IncludeStackInfo> IncludeMacroStack;
    252 
    253   /// Callbacks - These are actions invoked when some preprocessor activity is
    254   /// encountered (e.g. a file is #included, etc).
    255   PPCallbacks *Callbacks;
    256 
    257   /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping
    258   /// to the actual definition of the macro.
    259   llvm::DenseMap<IdentifierInfo*, MacroInfo*> Macros;
    260 
    261   /// \brief Macros that we want to warn because they are not used at the end
    262   /// of the translation unit; we store just their SourceLocations instead
    263   /// something like MacroInfo*. The benefit of this is that when we are
    264   /// deserializing from PCH, we don't need to deserialize identifier & macros
    265   /// just so that we can report that they are unused, we just warn using
    266   /// the SourceLocations of this set (that will be filled by the ASTReader).
    267   /// We are using SmallPtrSet instead of a vector for faster removal.
    268   typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
    269   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
    270 
    271   /// MacroArgCache - This is a "freelist" of MacroArg objects that can be
    272   /// reused for quick allocation.
    273   MacroArgs *MacroArgCache;
    274   friend class MacroArgs;
    275 
    276   /// PragmaPushMacroInfo - For each IdentifierInfo used in a #pragma
    277   /// push_macro directive, we keep a MacroInfo stack used to restore
    278   /// previous macro value.
    279   llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
    280 
    281   // Various statistics we track for performance analysis.
    282   unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
    283   unsigned NumIf, NumElse, NumEndif;
    284   unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
    285   unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
    286   unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
    287   unsigned NumSkipped;
    288 
    289   /// Predefines - This string is the predefined macros that preprocessor
    290   /// should use from the command line etc.
    291   std::string Predefines;
    292 
    293   /// TokenLexerCache - Cache macro expanders to reduce malloc traffic.
    294   enum { TokenLexerCacheSize = 8 };
    295   unsigned NumCachedTokenLexers;
    296   TokenLexer *TokenLexerCache[TokenLexerCacheSize];
    297 
    298   /// \brief Keeps macro expanded tokens for TokenLexers.
    299   //
    300   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
    301   /// going to lex in the cache and when it finishes the tokens are removed
    302   /// from the end of the cache.
    303   SmallVector<Token, 16> MacroExpandedTokens;
    304   std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
    305 
    306   /// \brief A record of the macro definitions and expansions that
    307   /// occurred during preprocessing.
    308   ///
    309   /// This is an optional side structure that can be enabled with
    310   /// \c createPreprocessingRecord() prior to preprocessing.
    311   PreprocessingRecord *Record;
    312 
    313 private:  // Cached tokens state.
    314   typedef SmallVector<Token, 1> CachedTokensTy;
    315 
    316   /// CachedTokens - Cached tokens are stored here when we do backtracking or
    317   /// lookahead. They are "lexed" by the CachingLex() method.
    318   CachedTokensTy CachedTokens;
    319 
    320   /// CachedLexPos - The position of the cached token that CachingLex() should
    321   /// "lex" next. If it points beyond the CachedTokens vector, it means that
    322   /// a normal Lex() should be invoked.
    323   CachedTokensTy::size_type CachedLexPos;
    324 
    325   /// BacktrackPositions - Stack of backtrack positions, allowing nested
    326   /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to
    327   /// indicate where CachedLexPos should be set when the BackTrack() method is
    328   /// invoked (at which point the last position is popped).
    329   std::vector<CachedTokensTy::size_type> BacktrackPositions;
    330 
    331   struct MacroInfoChain {
    332     MacroInfo MI;
    333     MacroInfoChain *Next;
    334     MacroInfoChain *Prev;
    335   };
    336 
    337   /// MacroInfos are managed as a chain for easy disposal.  This is the head
    338   /// of that list.
    339   MacroInfoChain *MIChainHead;
    340 
    341   /// MICache - A "freelist" of MacroInfo objects that can be reused for quick
    342   /// allocation.
    343   MacroInfoChain *MICache;
    344 
    345   MacroInfo *getInfoForMacro(IdentifierInfo *II) const;
    346 
    347 public:
    348   Preprocessor(DiagnosticsEngine &diags, LangOptions &opts,
    349                const TargetInfo *target,
    350                SourceManager &SM, HeaderSearch &Headers,
    351                ModuleLoader &TheModuleLoader,
    352                IdentifierInfoLookup *IILookup = 0,
    353                bool OwnsHeaderSearch = false,
    354                bool DelayInitialization = false,
    355                bool IncrProcessing = false);
    356 
    357   ~Preprocessor();
    358 
    359   /// \brief Initialize the preprocessor, if the constructor did not already
    360   /// perform the initialization.
    361   ///
    362   /// \param Target Information about the target.
    363   void Initialize(const TargetInfo &Target);
    364 
    365   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
    366   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
    367 
    368   const LangOptions &getLangOpts() const { return LangOpts; }
    369   const TargetInfo &getTargetInfo() const { return *Target; }
    370   FileManager &getFileManager() const { return FileMgr; }
    371   SourceManager &getSourceManager() const { return SourceMgr; }
    372   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
    373 
    374   IdentifierTable &getIdentifierTable() { return Identifiers; }
    375   SelectorTable &getSelectorTable() { return Selectors; }
    376   Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
    377   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
    378 
    379   void setPTHManager(PTHManager* pm);
    380 
    381   PTHManager *getPTHManager() { return PTH.get(); }
    382 
    383   void setExternalSource(ExternalPreprocessorSource *Source) {
    384     ExternalSource = Source;
    385   }
    386 
    387   ExternalPreprocessorSource *getExternalSource() const {
    388     return ExternalSource;
    389   }
    390 
    391   /// \brief Retrieve the module loader associated with this preprocessor.
    392   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
    393 
    394   /// SetCommentRetentionState - Control whether or not the preprocessor retains
    395   /// comments in output.
    396   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
    397     this->KeepComments = KeepComments | KeepMacroComments;
    398     this->KeepMacroComments = KeepMacroComments;
    399   }
    400 
    401   bool getCommentRetentionState() const { return KeepComments; }
    402 
    403   void SetSuppressIncludeNotFoundError(bool Suppress) {
    404     SuppressIncludeNotFoundError = Suppress;
    405   }
    406 
    407   bool GetSuppressIncludeNotFoundError() {
    408     return SuppressIncludeNotFoundError;
    409   }
    410 
    411   /// isCurrentLexer - Return true if we are lexing directly from the specified
    412   /// lexer.
    413   bool isCurrentLexer(const PreprocessorLexer *L) const {
    414     return CurPPLexer == L;
    415   }
    416 
    417   /// getCurrentLexer - Return the current lexer being lexed from.  Note
    418   /// that this ignores any potentially active macro expansions and _Pragma
    419   /// expansions going on at the time.
    420   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
    421 
    422   /// getCurrentFileLexer - Return the current file lexer being lexed from.
    423   /// Note that this ignores any potentially active macro expansions and _Pragma
    424   /// expansions going on at the time.
    425   PreprocessorLexer *getCurrentFileLexer() const;
    426 
    427   /// getPPCallbacks/addPPCallbacks - Accessors for preprocessor callbacks.
    428   /// Note that this class takes ownership of any PPCallbacks object given to
    429   /// it.
    430   PPCallbacks *getPPCallbacks() const { return Callbacks; }
    431   void addPPCallbacks(PPCallbacks *C) {
    432     if (Callbacks)
    433       C = new PPChainedCallbacks(C, Callbacks);
    434     Callbacks = C;
    435   }
    436 
    437   /// getMacroInfo - Given an identifier, return the MacroInfo it is #defined to
    438   /// or null if it isn't #define'd.
    439   MacroInfo *getMacroInfo(IdentifierInfo *II) const {
    440     if (!II->hasMacroDefinition())
    441       return 0;
    442 
    443     return getInfoForMacro(II);
    444   }
    445 
    446   /// setMacroInfo - Specify a macro for this identifier.
    447   ///
    448   void setMacroInfo(IdentifierInfo *II, MacroInfo *MI,
    449                     bool LoadedFromAST = false);
    450 
    451   /// macro_iterator/macro_begin/macro_end - This allows you to walk the current
    452   /// state of the macro table.  This visits every currently-defined macro.
    453   typedef llvm::DenseMap<IdentifierInfo*,
    454                          MacroInfo*>::const_iterator macro_iterator;
    455   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
    456   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
    457 
    458   const std::string &getPredefines() const { return Predefines; }
    459   /// setPredefines - Set the predefines for this Preprocessor.  These
    460   /// predefines are automatically injected when parsing the main file.
    461   void setPredefines(const char *P) { Predefines = P; }
    462   void setPredefines(const std::string &P) { Predefines = P; }
    463 
    464   /// getIdentifierInfo - Return information about the specified preprocessor
    465   /// identifier token.  The version of this method that takes two character
    466   /// pointers is preferred unless the identifier is already available as a
    467   /// string (this avoids allocation and copying of memory to construct an
    468   /// std::string).
    469   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
    470     return &Identifiers.get(Name);
    471   }
    472 
    473   /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
    474   /// If 'Namespace' is non-null, then it is a token required to exist on the
    475   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
    476   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
    477   void AddPragmaHandler(PragmaHandler *Handler) {
    478     AddPragmaHandler(StringRef(), Handler);
    479   }
    480 
    481   /// RemovePragmaHandler - Remove the specific pragma handler from
    482   /// the preprocessor. If \arg Namespace is non-null, then it should
    483   /// be the namespace that \arg Handler was added to. It is an error
    484   /// to remove a handler that has not been registered.
    485   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
    486   void RemovePragmaHandler(PragmaHandler *Handler) {
    487     RemovePragmaHandler(StringRef(), Handler);
    488   }
    489 
    490   /// \brief Add the specified comment handler to the preprocessor.
    491   void AddCommentHandler(CommentHandler *Handler);
    492 
    493   /// \brief Remove the specified comment handler.
    494   ///
    495   /// It is an error to remove a handler that has not been registered.
    496   void RemoveCommentHandler(CommentHandler *Handler);
    497 
    498   /// \brief Set the code completion handler to the given object.
    499   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
    500     CodeComplete = &Handler;
    501   }
    502 
    503   /// \brief Retrieve the current code-completion handler.
    504   CodeCompletionHandler *getCodeCompletionHandler() const {
    505     return CodeComplete;
    506   }
    507 
    508   /// \brief Clear out the code completion handler.
    509   void clearCodeCompletionHandler() {
    510     CodeComplete = 0;
    511   }
    512 
    513   /// \brief Hook used by the lexer to invoke the "natural language" code
    514   /// completion point.
    515   void CodeCompleteNaturalLanguage();
    516 
    517   /// \brief Retrieve the preprocessing record, or NULL if there is no
    518   /// preprocessing record.
    519   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
    520 
    521   /// \brief Create a new preprocessing record, which will keep track of
    522   /// all macro expansions, macro definitions, etc.
    523   void createPreprocessingRecord(bool RecordConditionalDirectives);
    524 
    525   /// EnterMainSourceFile - Enter the specified FileID as the main source file,
    526   /// which implicitly adds the builtin defines etc.
    527   void EnterMainSourceFile();
    528 
    529   /// EndSourceFile - Inform the preprocessor callbacks that processing is
    530   /// complete.
    531   void EndSourceFile();
    532 
    533   /// EnterSourceFile - Add a source file to the top of the include stack and
    534   /// start lexing tokens from it instead of the current buffer.  Emit an error
    535   /// and don't enter the file on error.
    536   void EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
    537                        SourceLocation Loc);
    538 
    539   /// EnterMacro - Add a Macro to the top of the include stack and start lexing
    540   /// tokens from it instead of the current buffer.  Args specifies the
    541   /// tokens input to a function-like macro.
    542   ///
    543   /// ILEnd specifies the location of the ')' for a function-like macro or the
    544   /// identifier for an object-like macro.
    545   void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroArgs *Args);
    546 
    547   /// EnterTokenStream - Add a "macro" context to the top of the include stack,
    548   /// which will cause the lexer to start returning the specified tokens.
    549   ///
    550   /// If DisableMacroExpansion is true, tokens lexed from the token stream will
    551   /// not be subject to further macro expansion.  Otherwise, these tokens will
    552   /// be re-macro-expanded when/if expansion is enabled.
    553   ///
    554   /// If OwnsTokens is false, this method assumes that the specified stream of
    555   /// tokens has a permanent owner somewhere, so they do not need to be copied.
    556   /// If it is true, it assumes the array of tokens is allocated with new[] and
    557   /// must be freed.
    558   ///
    559   void EnterTokenStream(const Token *Toks, unsigned NumToks,
    560                         bool DisableMacroExpansion, bool OwnsTokens);
    561 
    562   /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
    563   /// lexer stack.  This should only be used in situations where the current
    564   /// state of the top-of-stack lexer is known.
    565   void RemoveTopOfLexerStack();
    566 
    567   /// EnableBacktrackAtThisPos - From the point that this method is called, and
    568   /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
    569   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
    570   /// make the Preprocessor re-lex the same tokens.
    571   ///
    572   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
    573   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
    574   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
    575   ///
    576   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
    577   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
    578   /// tokens will continue indefinitely.
    579   ///
    580   void EnableBacktrackAtThisPos();
    581 
    582   /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
    583   void CommitBacktrackedTokens();
    584 
    585   /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
    586   /// EnableBacktrackAtThisPos() was previously called.
    587   void Backtrack();
    588 
    589   /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
    590   /// caching of tokens is on.
    591   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
    592 
    593   /// Lex - To lex a token from the preprocessor, just pull a token from the
    594   /// current lexer or macro object.
    595   void Lex(Token &Result) {
    596     switch (CurLexerKind) {
    597     case CLK_Lexer: CurLexer->Lex(Result); break;
    598     case CLK_PTHLexer: CurPTHLexer->Lex(Result); break;
    599     case CLK_TokenLexer: CurTokenLexer->Lex(Result); break;
    600     case CLK_CachingLexer: CachingLex(Result); break;
    601     case CLK_LexAfterModuleImport: LexAfterModuleImport(Result); break;
    602     }
    603   }
    604 
    605   void LexAfterModuleImport(Token &Result);
    606 
    607   /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
    608   /// something not a comment.  This is useful in -E -C mode where comments
    609   /// would foul up preprocessor directive handling.
    610   void LexNonComment(Token &Result) {
    611     do
    612       Lex(Result);
    613     while (Result.getKind() == tok::comment);
    614   }
    615 
    616   /// LexUnexpandedToken - This is just like Lex, but this disables macro
    617   /// expansion of identifier tokens.
    618   void LexUnexpandedToken(Token &Result) {
    619     // Disable macro expansion.
    620     bool OldVal = DisableMacroExpansion;
    621     DisableMacroExpansion = true;
    622     // Lex the token.
    623     Lex(Result);
    624 
    625     // Reenable it.
    626     DisableMacroExpansion = OldVal;
    627   }
    628 
    629   /// LexUnexpandedNonComment - Like LexNonComment, but this disables macro
    630   /// expansion of identifier tokens.
    631   void LexUnexpandedNonComment(Token &Result) {
    632     do
    633       LexUnexpandedToken(Result);
    634     while (Result.getKind() == tok::comment);
    635   }
    636 
    637   /// LookAhead - This peeks ahead N tokens and returns that token without
    638   /// consuming any tokens.  LookAhead(0) returns the next token that would be
    639   /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
    640   /// returns normal tokens after phase 5.  As such, it is equivalent to using
    641   /// 'Lex', not 'LexUnexpandedToken'.
    642   const Token &LookAhead(unsigned N) {
    643     if (CachedLexPos + N < CachedTokens.size())
    644       return CachedTokens[CachedLexPos+N];
    645     else
    646       return PeekAhead(N+1);
    647   }
    648 
    649   /// RevertCachedTokens - When backtracking is enabled and tokens are cached,
    650   /// this allows to revert a specific number of tokens.
    651   /// Note that the number of tokens being reverted should be up to the last
    652   /// backtrack position, not more.
    653   void RevertCachedTokens(unsigned N) {
    654     assert(isBacktrackEnabled() &&
    655            "Should only be called when tokens are cached for backtracking");
    656     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
    657          && "Should revert tokens up to the last backtrack position, not more");
    658     assert(signed(CachedLexPos) - signed(N) >= 0 &&
    659            "Corrupted backtrack positions ?");
    660     CachedLexPos -= N;
    661   }
    662 
    663   /// EnterToken - Enters a token in the token stream to be lexed next. If
    664   /// BackTrack() is called afterwards, the token will remain at the insertion
    665   /// point.
    666   void EnterToken(const Token &Tok) {
    667     EnterCachingLexMode();
    668     CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
    669   }
    670 
    671   /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching
    672   /// tokens (because backtrack is enabled) it should replace the most recent
    673   /// cached tokens with the given annotation token. This function has no effect
    674   /// if backtracking is not enabled.
    675   ///
    676   /// Note that the use of this function is just for optimization; so that the
    677   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
    678   /// invoked.
    679   void AnnotateCachedTokens(const Token &Tok) {
    680     assert(Tok.isAnnotation() && "Expected annotation token");
    681     if (CachedLexPos != 0 && isBacktrackEnabled())
    682       AnnotatePreviousCachedTokens(Tok);
    683   }
    684 
    685   /// \brief Replace the last token with an annotation token.
    686   ///
    687   /// Like AnnotateCachedTokens(), this routine replaces an
    688   /// already-parsed (and resolved) token with an annotation
    689   /// token. However, this routine only replaces the last token with
    690   /// the annotation token; it does not affect any other cached
    691   /// tokens. This function has no effect if backtracking is not
    692   /// enabled.
    693   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
    694     assert(Tok.isAnnotation() && "Expected annotation token");
    695     if (CachedLexPos != 0 && isBacktrackEnabled())
    696       CachedTokens[CachedLexPos-1] = Tok;
    697   }
    698 
    699   /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
    700   /// CurTokenLexer pointers.
    701   void recomputeCurLexerKind();
    702 
    703   /// \brief Returns true if incremental processing is enabled
    704   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
    705 
    706   /// \brief Enables the incremental processing
    707   void enableIncrementalProcessing(bool value = true) {
    708     IncrementalProcessing = value;
    709   }
    710 
    711   /// \brief Specify the point at which code-completion will be performed.
    712   ///
    713   /// \param File the file in which code completion should occur. If
    714   /// this file is included multiple times, code-completion will
    715   /// perform completion the first time it is included. If NULL, this
    716   /// function clears out the code-completion point.
    717   ///
    718   /// \param Line the line at which code completion should occur
    719   /// (1-based).
    720   ///
    721   /// \param Column the column at which code completion should occur
    722   /// (1-based).
    723   ///
    724   /// \returns true if an error occurred, false otherwise.
    725   bool SetCodeCompletionPoint(const FileEntry *File,
    726                               unsigned Line, unsigned Column);
    727 
    728   /// \brief Determine if we are performing code completion.
    729   bool isCodeCompletionEnabled() const { return CodeCompletionFile != 0; }
    730 
    731   /// \brief Returns the location of the code-completion point.
    732   /// Returns an invalid location if code-completion is not enabled or the file
    733   /// containing the code-completion point has not been lexed yet.
    734   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
    735 
    736   /// \brief Returns the start location of the file of code-completion point.
    737   /// Returns an invalid location if code-completion is not enabled or the file
    738   /// containing the code-completion point has not been lexed yet.
    739   SourceLocation getCodeCompletionFileLoc() const {
    740     return CodeCompletionFileLoc;
    741   }
    742 
    743   /// \brief Returns true if code-completion is enabled and we have hit the
    744   /// code-completion point.
    745   bool isCodeCompletionReached() const { return CodeCompletionReached; }
    746 
    747   /// \brief Note that we hit the code-completion point.
    748   void setCodeCompletionReached() {
    749     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
    750     CodeCompletionReached = true;
    751     // Silence any diagnostics that occur after we hit the code-completion.
    752     getDiagnostics().setSuppressAllDiagnostics(true);
    753   }
    754 
    755   /// \brief The location of the currently-active #pragma clang
    756   /// arc_cf_code_audited begin.  Returns an invalid location if there
    757   /// is no such pragma active.
    758   SourceLocation getPragmaARCCFCodeAuditedLoc() const {
    759     return PragmaARCCFCodeAuditedLoc;
    760   }
    761 
    762   /// \brief Set the location of the currently-active #pragma clang
    763   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
    764   void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
    765     PragmaARCCFCodeAuditedLoc = Loc;
    766   }
    767 
    768   /// \brief Instruct the preprocessor to skip part of the main
    769   /// the main source file.
    770   ///
    771   /// \brief Bytes The number of bytes in the preamble to skip.
    772   ///
    773   /// \brief StartOfLine Whether skipping these bytes puts the lexer at the
    774   /// start of a line.
    775   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
    776     SkipMainFilePreamble.first = Bytes;
    777     SkipMainFilePreamble.second = StartOfLine;
    778   }
    779 
    780   /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
    781   /// the specified Token's location, translating the token's start
    782   /// position in the current buffer into a SourcePosition object for rendering.
    783   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
    784     return Diags->Report(Loc, DiagID);
    785   }
    786 
    787   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
    788     return Diags->Report(Tok.getLocation(), DiagID);
    789   }
    790 
    791   /// getSpelling() - Return the 'spelling' of the token at the given
    792   /// location; does not go up to the spelling location or down to the
    793   /// expansion location.
    794   ///
    795   /// \param buffer A buffer which will be used only if the token requires
    796   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
    797   /// \param invalid If non-null, will be set \c true if an error occurs.
    798   StringRef getSpelling(SourceLocation loc,
    799                               SmallVectorImpl<char> &buffer,
    800                               bool *invalid = 0) const {
    801     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
    802   }
    803 
    804   /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
    805   /// token is the characters used to represent the token in the source file
    806   /// after trigraph expansion and escaped-newline folding.  In particular, this
    807   /// wants to get the true, uncanonicalized, spelling of things like digraphs
    808   /// UCNs, etc.
    809   ///
    810   /// \param Invalid If non-null, will be set \c true if an error occurs.
    811   std::string getSpelling(const Token &Tok, bool *Invalid = 0) const {
    812     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
    813   }
    814 
    815   /// getSpelling - This method is used to get the spelling of a token into a
    816   /// preallocated buffer, instead of as an std::string.  The caller is required
    817   /// to allocate enough space for the token, which is guaranteed to be at least
    818   /// Tok.getLength() bytes long.  The length of the actual result is returned.
    819   ///
    820   /// Note that this method may do two possible things: it may either fill in
    821   /// the buffer specified with characters, or it may *change the input pointer*
    822   /// to point to a constant buffer with the data already in it (avoiding a
    823   /// copy).  The caller is not allowed to modify the returned buffer pointer
    824   /// if an internal buffer is returned.
    825   unsigned getSpelling(const Token &Tok, const char *&Buffer,
    826                        bool *Invalid = 0) const {
    827     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
    828   }
    829 
    830   /// getSpelling - This method is used to get the spelling of a token into a
    831   /// SmallVector. Note that the returned StringRef may not point to the
    832   /// supplied buffer if a copy can be avoided.
    833   StringRef getSpelling(const Token &Tok,
    834                         SmallVectorImpl<char> &Buffer,
    835                         bool *Invalid = 0) const;
    836 
    837   /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
    838   /// with length 1, return the character.
    839   char getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
    840                                                    bool *Invalid = 0) const {
    841     assert(Tok.is(tok::numeric_constant) &&
    842            Tok.getLength() == 1 && "Called on unsupported token");
    843     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
    844 
    845     // If the token is carrying a literal data pointer, just use it.
    846     if (const char *D = Tok.getLiteralData())
    847       return *D;
    848 
    849     // Otherwise, fall back on getCharacterData, which is slower, but always
    850     // works.
    851     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
    852   }
    853 
    854   /// \brief Retrieve the name of the immediate macro expansion.
    855   ///
    856   /// This routine starts from a source location, and finds the name of the macro
    857   /// responsible for its immediate expansion. It looks through any intervening
    858   /// macro argument expansions to compute this. It returns a StringRef which
    859   /// refers to the SourceManager-owned buffer of the source where that macro
    860   /// name is spelled. Thus, the result shouldn't out-live the SourceManager.
    861   StringRef getImmediateMacroName(SourceLocation Loc) {
    862     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
    863   }
    864 
    865   /// CreateString - Plop the specified string into a scratch buffer and set the
    866   /// specified token's location and length to it.  If specified, the source
    867   /// location provides a location of the expansion point of the token.
    868   void CreateString(const char *Buf, unsigned Len, Token &Tok,
    869                     SourceLocation ExpansionLocStart = SourceLocation(),
    870                     SourceLocation ExpansionLocEnd = SourceLocation());
    871 
    872   /// \brief Computes the source location just past the end of the
    873   /// token at this source location.
    874   ///
    875   /// This routine can be used to produce a source location that
    876   /// points just past the end of the token referenced by \p Loc, and
    877   /// is generally used when a diagnostic needs to point just after a
    878   /// token where it expected something different that it received. If
    879   /// the returned source location would not be meaningful (e.g., if
    880   /// it points into a macro), this routine returns an invalid
    881   /// source location.
    882   ///
    883   /// \param Offset an offset from the end of the token, where the source
    884   /// location should refer to. The default offset (0) produces a source
    885   /// location pointing just past the end of the token; an offset of 1 produces
    886   /// a source location pointing to the last character in the token, etc.
    887   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
    888     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
    889   }
    890 
    891   /// \brief Returns true if the given MacroID location points at the first
    892   /// token of the macro expansion.
    893   ///
    894   /// \param MacroBegin If non-null and function returns true, it is set to
    895   /// begin location of the macro.
    896   bool isAtStartOfMacroExpansion(SourceLocation loc,
    897                                  SourceLocation *MacroBegin = 0) const {
    898     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
    899                                             MacroBegin);
    900   }
    901 
    902   /// \brief Returns true if the given MacroID location points at the last
    903   /// token of the macro expansion.
    904   ///
    905   /// \param MacroBegin If non-null and function returns true, it is set to
    906   /// end location of the macro.
    907   bool isAtEndOfMacroExpansion(SourceLocation loc,
    908                                SourceLocation *MacroEnd = 0) const {
    909     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
    910   }
    911 
    912   /// DumpToken - Print the token to stderr, used for debugging.
    913   ///
    914   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
    915   void DumpLocation(SourceLocation Loc) const;
    916   void DumpMacro(const MacroInfo &MI) const;
    917 
    918   /// AdvanceToTokenCharacter - Given a location that specifies the start of a
    919   /// token, return a new location that specifies a character within the token.
    920   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
    921                                          unsigned Char) const {
    922     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
    923   }
    924 
    925   /// IncrementPasteCounter - Increment the counters for the number of token
    926   /// paste operations performed.  If fast was specified, this is a 'fast paste'
    927   /// case we handled.
    928   ///
    929   void IncrementPasteCounter(bool isFast) {
    930     if (isFast)
    931       ++NumFastTokenPaste;
    932     else
    933       ++NumTokenPaste;
    934   }
    935 
    936   void PrintStats();
    937 
    938   size_t getTotalMemory() const;
    939 
    940   /// HandleMicrosoftCommentPaste - When the macro expander pastes together a
    941   /// comment (/##/) in microsoft mode, this method handles updating the current
    942   /// state, returning the token on the next source line.
    943   void HandleMicrosoftCommentPaste(Token &Tok);
    944 
    945   //===--------------------------------------------------------------------===//
    946   // Preprocessor callback methods.  These are invoked by a lexer as various
    947   // directives and events are found.
    948 
    949   /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
    950   /// identifier information for the token and install it into the token,
    951   /// updating the token kind accordingly.
    952   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
    953 
    954 private:
    955   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
    956 
    957 public:
    958 
    959   // SetPoisonReason - Call this function to indicate the reason for
    960   // poisoning an identifier. If that identifier is accessed while
    961   // poisoned, then this reason will be used instead of the default
    962   // "poisoned" diagnostic.
    963   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
    964 
    965   // HandlePoisonedIdentifier - Display reason for poisoned
    966   // identifier.
    967   void HandlePoisonedIdentifier(Token & Tok);
    968 
    969   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
    970     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
    971       if(II->isPoisoned()) {
    972         HandlePoisonedIdentifier(Identifier);
    973       }
    974     }
    975   }
    976 
    977 private:
    978   /// Identifiers used for SEH handling in Borland. These are only
    979   /// allowed in particular circumstances
    980   // __except block
    981   IdentifierInfo *Ident__exception_code,
    982                  *Ident___exception_code,
    983                  *Ident_GetExceptionCode;
    984   // __except filter expression
    985   IdentifierInfo *Ident__exception_info,
    986                  *Ident___exception_info,
    987                  *Ident_GetExceptionInfo;
    988   // __finally
    989   IdentifierInfo *Ident__abnormal_termination,
    990                  *Ident___abnormal_termination,
    991                  *Ident_AbnormalTermination;
    992 public:
    993   void PoisonSEHIdentifiers(bool Poison = true); // Borland
    994 
    995   /// HandleIdentifier - This callback is invoked when the lexer reads an
    996   /// identifier and has filled in the tokens IdentifierInfo member.  This
    997   /// callback potentially macro expands it or turns it into a named token (like
    998   /// 'for').
    999   void HandleIdentifier(Token &Identifier);
   1000 
   1001 
   1002   /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
   1003   /// the current file.  This either returns the EOF token and returns true, or
   1004   /// pops a level off the include stack and returns false, at which point the
   1005   /// client should call lex again.
   1006   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
   1007 
   1008   /// HandleEndOfTokenLexer - This callback is invoked when the current
   1009   /// TokenLexer hits the end of its token stream.
   1010   bool HandleEndOfTokenLexer(Token &Result);
   1011 
   1012   /// HandleDirective - This callback is invoked when the lexer sees a # token
   1013   /// at the start of a line.  This consumes the directive, modifies the
   1014   /// lexer/preprocessor state, and advances the lexer(s) so that the next token
   1015   /// read is the correct one.
   1016   void HandleDirective(Token &Result);
   1017 
   1018   /// CheckEndOfDirective - Ensure that the next token is a tok::eod token.  If
   1019   /// not, emit a diagnostic and consume up until the eod.  If EnableMacros is
   1020   /// true, then we consider macros that expand to zero tokens as being ok.
   1021   void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
   1022 
   1023   /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
   1024   /// current line until the tok::eod token is found.
   1025   void DiscardUntilEndOfDirective();
   1026 
   1027   /// SawDateOrTime - This returns true if the preprocessor has seen a use of
   1028   /// __DATE__ or __TIME__ in the file so far.
   1029   bool SawDateOrTime() const {
   1030     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
   1031   }
   1032   unsigned getCounterValue() const { return CounterValue; }
   1033   void setCounterValue(unsigned V) { CounterValue = V; }
   1034 
   1035   /// \brief Retrieves the module that we're currently building, if any.
   1036   Module *getCurrentModule();
   1037 
   1038   /// AllocateMacroInfo - Allocate a new MacroInfo object with the provide
   1039   ///  SourceLocation.
   1040   MacroInfo *AllocateMacroInfo(SourceLocation L);
   1041 
   1042   /// CloneMacroInfo - Allocate a new MacroInfo object which is clone of MI.
   1043   MacroInfo *CloneMacroInfo(const MacroInfo &MI);
   1044 
   1045   /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
   1046   /// checked and spelled filename, e.g. as an operand of #include. This returns
   1047   /// true if the input filename was in <>'s or false if it were in ""'s.  The
   1048   /// caller is expected to provide a buffer that is large enough to hold the
   1049   /// spelling of the filename, but is also expected to handle the case when
   1050   /// this method decides to use a different buffer.
   1051   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
   1052 
   1053   /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
   1054   /// return null on failure.  isAngled indicates whether the file reference is
   1055   /// for system #include's or not (i.e. using <> instead of "").
   1056   const FileEntry *LookupFile(StringRef Filename,
   1057                               bool isAngled, const DirectoryLookup *FromDir,
   1058                               const DirectoryLookup *&CurDir,
   1059                               SmallVectorImpl<char> *SearchPath,
   1060                               SmallVectorImpl<char> *RelativePath,
   1061                               Module **SuggestedModule,
   1062                               bool SkipCache = false);
   1063 
   1064   /// GetCurLookup - The DirectoryLookup structure used to find the current
   1065   /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
   1066   /// implement #include_next and find directory-specific properties.
   1067   const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
   1068 
   1069   /// isInPrimaryFile - Return true if we're in the top-level file, not in a
   1070   /// #include.
   1071   bool isInPrimaryFile() const;
   1072 
   1073   /// ConcatenateIncludeName - Handle cases where the #include name is expanded
   1074   /// from a macro as multiple tokens, which need to be glued together.  This
   1075   /// occurs for code like:
   1076   ///    #define FOO <a/b.h>
   1077   ///    #include FOO
   1078   /// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
   1079   ///
   1080   /// This code concatenates and consumes tokens up to the '>' token.  It
   1081   /// returns false if the > was found, otherwise it returns true if it finds
   1082   /// and consumes the EOD marker.
   1083   bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
   1084                               SourceLocation &End);
   1085 
   1086   /// LexOnOffSwitch - Lex an on-off-switch (C99 6.10.6p2) and verify that it is
   1087   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
   1088   bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
   1089 
   1090 private:
   1091 
   1092   void PushIncludeMacroStack() {
   1093     IncludeMacroStack.push_back(IncludeStackInfo(CurLexerKind,
   1094                                                  CurLexer.take(),
   1095                                                  CurPTHLexer.take(),
   1096                                                  CurPPLexer,
   1097                                                  CurTokenLexer.take(),
   1098                                                  CurDirLookup));
   1099     CurPPLexer = 0;
   1100   }
   1101 
   1102   void PopIncludeMacroStack() {
   1103     CurLexer.reset(IncludeMacroStack.back().TheLexer);
   1104     CurPTHLexer.reset(IncludeMacroStack.back().ThePTHLexer);
   1105     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
   1106     CurTokenLexer.reset(IncludeMacroStack.back().TheTokenLexer);
   1107     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
   1108     CurLexerKind = IncludeMacroStack.back().CurLexerKind;
   1109     IncludeMacroStack.pop_back();
   1110   }
   1111 
   1112   /// AllocateMacroInfo - Allocate a new MacroInfo object.
   1113   MacroInfo *AllocateMacroInfo();
   1114 
   1115   /// ReleaseMacroInfo - Release the specified MacroInfo.  This memory will
   1116   ///  be reused for allocating new MacroInfo objects.
   1117   void ReleaseMacroInfo(MacroInfo* MI);
   1118 
   1119   /// ReadMacroName - Lex and validate a macro name, which occurs after a
   1120   /// #define or #undef.  This emits a diagnostic, sets the token kind to eod,
   1121   /// and discards the rest of the macro line if the macro name is invalid.
   1122   void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
   1123 
   1124   /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
   1125   /// definition has just been read.  Lex the rest of the arguments and the
   1126   /// closing ), updating MI with what we learn and saving in LastTok the
   1127   /// last token read.
   1128   /// Return true if an error occurs parsing the arg list.
   1129   bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
   1130 
   1131   /// SkipExcludedConditionalBlock - We just read a #if or related directive and
   1132   /// decided that the subsequent tokens are in the #if'd out portion of the
   1133   /// file.  Lex the rest of the file, until we see an #endif.  If
   1134   /// FoundNonSkipPortion is true, then we have already emitted code for part of
   1135   /// this #if directive, so #else/#elif blocks should never be entered. If
   1136   /// FoundElse is false, then #else directives are ok, if not, then we have
   1137   /// already seen one so a #else directive is a duplicate.  When this returns,
   1138   /// the caller can lex the first valid token.
   1139   void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
   1140                                     bool FoundNonSkipPortion, bool FoundElse,
   1141                                     SourceLocation ElseLoc = SourceLocation());
   1142 
   1143   /// PTHSkipExcludedConditionalBlock - A fast PTH version of
   1144   ///  SkipExcludedConditionalBlock.
   1145   void PTHSkipExcludedConditionalBlock();
   1146 
   1147   /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
   1148   /// may occur after a #if or #elif directive and return it as a bool.  If the
   1149   /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
   1150   bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
   1151 
   1152   /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
   1153   /// #pragma GCC poison/system_header/dependency and #pragma once.
   1154   void RegisterBuiltinPragmas();
   1155 
   1156   /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
   1157   /// identifier table.
   1158   void RegisterBuiltinMacros();
   1159 
   1160   /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
   1161   /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
   1162   /// the macro should not be expanded return true, otherwise return false.
   1163   bool HandleMacroExpandedIdentifier(Token &Tok, MacroInfo *MI);
   1164 
   1165   /// \brief Cache macro expanded tokens for TokenLexers.
   1166   //
   1167   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
   1168   /// going to lex in the cache and when it finishes the tokens are removed
   1169   /// from the end of the cache.
   1170   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
   1171                                   ArrayRef<Token> tokens);
   1172   void removeCachedMacroExpandedTokensOfLastLexer();
   1173   friend void TokenLexer::ExpandFunctionArguments();
   1174 
   1175   /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
   1176   /// lexed is a '('.  If so, consume the token and return true, if not, this
   1177   /// method should have no observable side-effect on the lexed tokens.
   1178   bool isNextPPTokenLParen();
   1179 
   1180   /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
   1181   /// invoked to read all of the formal arguments specified for the macro
   1182   /// invocation.  This returns null on error.
   1183   MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
   1184                                        SourceLocation &ExpansionEnd);
   1185 
   1186   /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
   1187   /// as a builtin macro, handle it and return the next token as 'Tok'.
   1188   void ExpandBuiltinMacro(Token &Tok);
   1189 
   1190   /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
   1191   /// return the first token after the directive.  The _Pragma token has just
   1192   /// been read into 'Tok'.
   1193   void Handle_Pragma(Token &Tok);
   1194 
   1195   /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
   1196   /// is not enclosed within a string literal.
   1197   void HandleMicrosoft__pragma(Token &Tok);
   1198 
   1199   /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
   1200   /// start lexing tokens from it instead of the current buffer.
   1201   void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
   1202 
   1203   /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and
   1204   /// start getting tokens from it using the PTH cache.
   1205   void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
   1206 
   1207   /// IsFileLexer - Returns true if we are lexing from a file and not a
   1208   ///  pragma or a macro.
   1209   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
   1210     return L ? !L->isPragmaLexer() : P != 0;
   1211   }
   1212 
   1213   static bool IsFileLexer(const IncludeStackInfo& I) {
   1214     return IsFileLexer(I.TheLexer, I.ThePPLexer);
   1215   }
   1216 
   1217   bool IsFileLexer() const {
   1218     return IsFileLexer(CurLexer.get(), CurPPLexer);
   1219   }
   1220 
   1221   //===--------------------------------------------------------------------===//
   1222   // Caching stuff.
   1223   void CachingLex(Token &Result);
   1224   bool InCachingLexMode() const {
   1225     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
   1226     // that we are past EOF, not that we are in CachingLex mode.
   1227     return CurPPLexer == 0 && CurTokenLexer == 0 && CurPTHLexer == 0 &&
   1228            !IncludeMacroStack.empty();
   1229   }
   1230   void EnterCachingLexMode();
   1231   void ExitCachingLexMode() {
   1232     if (InCachingLexMode())
   1233       RemoveTopOfLexerStack();
   1234   }
   1235   const Token &PeekAhead(unsigned N);
   1236   void AnnotatePreviousCachedTokens(const Token &Tok);
   1237 
   1238   //===--------------------------------------------------------------------===//
   1239   /// Handle*Directive - implement the various preprocessor directives.  These
   1240   /// should side-effect the current preprocessor object so that the next call
   1241   /// to Lex() will return the appropriate token next.
   1242   void HandleLineDirective(Token &Tok);
   1243   void HandleDigitDirective(Token &Tok);
   1244   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
   1245   void HandleIdentSCCSDirective(Token &Tok);
   1246   void HandleMacroPublicDirective(Token &Tok);
   1247   void HandleMacroPrivateDirective(Token &Tok);
   1248 
   1249   // File inclusion.
   1250   void HandleIncludeDirective(SourceLocation HashLoc,
   1251                               Token &Tok,
   1252                               const DirectoryLookup *LookupFrom = 0,
   1253                               bool isImport = false);
   1254   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
   1255   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
   1256   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
   1257   void HandleMicrosoftImportDirective(Token &Tok);
   1258 
   1259   // Macro handling.
   1260   void HandleDefineDirective(Token &Tok);
   1261   void HandleUndefDirective(Token &Tok);
   1262 
   1263   // Conditional Inclusion.
   1264   void HandleIfdefDirective(Token &Tok, bool isIfndef,
   1265                             bool ReadAnyTokensBeforeDirective);
   1266   void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
   1267   void HandleEndifDirective(Token &Tok);
   1268   void HandleElseDirective(Token &Tok);
   1269   void HandleElifDirective(Token &Tok);
   1270 
   1271   // Pragmas.
   1272   void HandlePragmaDirective(unsigned Introducer);
   1273 public:
   1274   void HandlePragmaOnce(Token &OnceTok);
   1275   void HandlePragmaMark();
   1276   void HandlePragmaPoison(Token &PoisonTok);
   1277   void HandlePragmaSystemHeader(Token &SysHeaderTok);
   1278   void HandlePragmaDependency(Token &DependencyTok);
   1279   void HandlePragmaComment(Token &CommentTok);
   1280   void HandlePragmaMessage(Token &MessageTok);
   1281   void HandlePragmaPushMacro(Token &Tok);
   1282   void HandlePragmaPopMacro(Token &Tok);
   1283   void HandlePragmaIncludeAlias(Token &Tok);
   1284   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
   1285 
   1286   // Return true and store the first token only if any CommentHandler
   1287   // has inserted some tokens and getCommentRetentionState() is false.
   1288   bool HandleComment(Token &Token, SourceRange Comment);
   1289 
   1290   /// \brief A macro is used, update information about macros that need unused
   1291   /// warnings.
   1292   void markMacroAsUsed(MacroInfo *MI);
   1293 };
   1294 
   1295 /// \brief Abstract base class that describes a handler that will receive
   1296 /// source ranges for each of the comments encountered in the source file.
   1297 class CommentHandler {
   1298 public:
   1299   virtual ~CommentHandler();
   1300 
   1301   // The handler shall return true if it has pushed any tokens
   1302   // to be read using e.g. EnterToken or EnterTokenStream.
   1303   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
   1304 };
   1305 
   1306 }  // end namespace clang
   1307 
   1308 #endif
   1309