Home | History | Annotate | Download | only in Format
      1 //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This file contains FormatTokenLexer, which tokenizes a source file
     12 /// into a token stream suitable for ClangFormat.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
     17 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
     18 
     19 #include "Encoding.h"
     20 #include "FormatToken.h"
     21 #include "clang/Basic/SourceLocation.h"
     22 #include "clang/Basic/SourceManager.h"
     23 #include "clang/Format/Format.h"
     24 #include "llvm/Support/Regex.h"
     25 
     26 namespace clang {
     27 namespace format {
     28 
     29 class FormatTokenLexer {
     30 public:
     31   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
     32                    const FormatStyle &Style, encoding::Encoding Encoding);
     33 
     34   ArrayRef<FormatToken *> lex();
     35 
     36   const AdditionalKeywords &getKeywords() { return Keywords; }
     37 
     38 private:
     39   void tryMergePreviousTokens();
     40 
     41   bool tryMergeLessLess();
     42 
     43   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
     44 
     45   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
     46   bool precedesOperand(FormatToken *Tok);
     47 
     48   bool canPrecedeRegexLiteral(FormatToken *Prev);
     49 
     50   // Tries to parse a JavaScript Regex literal starting at the current token,
     51   // if that begins with a slash and is in a location where JavaScript allows
     52   // regex literals. Changes the current token to a regex literal and updates
     53   // its text if successful.
     54   void tryParseJSRegexLiteral();
     55 
     56   void tryParseTemplateString();
     57 
     58   bool tryMerge_TMacro();
     59 
     60   bool tryMergeConflictMarkers();
     61 
     62   FormatToken *getStashedToken();
     63 
     64   FormatToken *getNextToken();
     65 
     66   FormatToken *FormatTok;
     67   bool IsFirstToken;
     68   bool GreaterStashed, LessStashed;
     69   unsigned Column;
     70   unsigned TrailingWhitespace;
     71   std::unique_ptr<Lexer> Lex;
     72   const SourceManager &SourceMgr;
     73   FileID ID;
     74   const FormatStyle &Style;
     75   IdentifierTable IdentTable;
     76   AdditionalKeywords Keywords;
     77   encoding::Encoding Encoding;
     78   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
     79   // Index (in 'Tokens') of the last token that starts a new line.
     80   unsigned FirstInLineIndex;
     81   SmallVector<FormatToken *, 16> Tokens;
     82   SmallVector<IdentifierInfo *, 8> ForEachMacros;
     83 
     84   bool FormattingDisabled;
     85 
     86   llvm::Regex MacroBlockBeginRegex;
     87   llvm::Regex MacroBlockEndRegex;
     88 
     89   void readRawToken(FormatToken &Tok);
     90 
     91   void resetLexer(unsigned Offset);
     92 };
     93 
     94 } // namespace format
     95 } // namespace clang
     96 
     97 #endif
     98