Home | History | Annotate | Download | only in Format
      1 //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This file contains the declaration of the UnwrappedLineParser,
     12 /// which turns a stream of tokens into UnwrappedLines.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
     17 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
     18 
     19 #include "FormatToken.h"
     20 #include "clang/Basic/IdentifierTable.h"
     21 #include "clang/Format/Format.h"
     22 #include <list>
     23 #include <stack>
     24 
     25 namespace clang {
     26 namespace format {
     27 
     28 struct UnwrappedLineNode;
     29 
     30 /// \brief An unwrapped line is a sequence of \c Token, that we would like to
     31 /// put on a single line if there was no column limit.
     32 ///
     33 /// This is used as a main interface between the \c UnwrappedLineParser and the
     34 /// \c UnwrappedLineFormatter. The key property is that changing the formatting
     35 /// within an unwrapped line does not affect any other unwrapped lines.
     36 struct UnwrappedLine {
     37   UnwrappedLine();
     38 
     39   // FIXME: Don't use std::list here.
     40   /// \brief The \c Tokens comprising this \c UnwrappedLine.
     41   std::list<UnwrappedLineNode> Tokens;
     42 
     43   /// \brief The indent level of the \c UnwrappedLine.
     44   unsigned Level;
     45 
     46   /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
     47   bool InPPDirective;
     48 
     49   bool MustBeDeclaration;
     50 };
     51 
     52 class UnwrappedLineConsumer {
     53 public:
     54   virtual ~UnwrappedLineConsumer() {}
     55   virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
     56   virtual void finishRun() = 0;
     57 };
     58 
     59 class FormatTokenSource;
     60 
     61 class UnwrappedLineParser {
     62 public:
     63   UnwrappedLineParser(const FormatStyle &Style,
     64                       const AdditionalKeywords &Keywords,
     65                       ArrayRef<FormatToken *> Tokens,
     66                       UnwrappedLineConsumer &Callback);
     67 
     68   void parse();
     69 
     70 private:
     71   void reset();
     72   void parseFile();
     73   void parseLevel(bool HasOpeningBrace);
     74   void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
     75                   bool MunchSemi = true);
     76   void parseChildBlock();
     77   void parsePPDirective();
     78   void parsePPDefine();
     79   void parsePPIf(bool IfDef);
     80   void parsePPElIf();
     81   void parsePPElse();
     82   void parsePPEndIf();
     83   void parsePPUnknown();
     84   void readTokenWithJavaScriptASI();
     85   void parseStructuralElement();
     86   bool tryToParseBracedList();
     87   bool parseBracedList(bool ContinueOnSemicolons = false);
     88   void parseParens();
     89   void parseSquare();
     90   void parseIfThenElse();
     91   void parseTryCatch();
     92   void parseForOrWhileLoop();
     93   void parseDoWhile();
     94   void parseLabel();
     95   void parseCaseLabel();
     96   void parseSwitch();
     97   void parseNamespace();
     98   void parseNew();
     99   void parseAccessSpecifier();
    100   bool parseEnum();
    101   void parseJavaEnumBody();
    102   void parseRecord();
    103   void parseObjCProtocolList();
    104   void parseObjCUntilAtEnd();
    105   void parseObjCInterfaceOrImplementation();
    106   void parseObjCProtocol();
    107   void parseJavaScriptEs6ImportExport();
    108   bool tryToParseLambda();
    109   bool tryToParseLambdaIntroducer();
    110   void tryToParseJSFunction();
    111   void addUnwrappedLine();
    112   bool eof() const;
    113   void nextToken();
    114   const FormatToken *getPreviousToken();
    115   void readToken();
    116   void flushComments(bool NewlineBeforeNext);
    117   void pushToken(FormatToken *Tok);
    118   void calculateBraceTypes(bool ExpectClassBody = false);
    119 
    120   // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
    121   // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
    122   // this branch either cannot be taken (for example '#if false'), or should
    123   // not be taken in this round.
    124   void conditionalCompilationCondition(bool Unreachable);
    125   void conditionalCompilationStart(bool Unreachable);
    126   void conditionalCompilationAlternative();
    127   void conditionalCompilationEnd();
    128 
    129   bool isOnNewLine(const FormatToken &FormatTok);
    130 
    131   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
    132   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
    133   // and use that everywhere in the Parser.
    134   std::unique_ptr<UnwrappedLine> Line;
    135 
    136   // Comments are sorted into unwrapped lines by whether they are in the same
    137   // line as the previous token, or not. If not, they belong to the next token.
    138   // Since the next token might already be in a new unwrapped line, we need to
    139   // store the comments belonging to that token.
    140   SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
    141   FormatToken *FormatTok;
    142   bool MustBreakBeforeNextToken;
    143 
    144   // The parsed lines. Only added to through \c CurrentLines.
    145   SmallVector<UnwrappedLine, 8> Lines;
    146 
    147   // Preprocessor directives are parsed out-of-order from other unwrapped lines.
    148   // Thus, we need to keep a list of preprocessor directives to be reported
    149   // after an unwarpped line that has been started was finished.
    150   SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
    151 
    152   // New unwrapped lines are added via CurrentLines.
    153   // Usually points to \c &Lines. While parsing a preprocessor directive when
    154   // there is an unfinished previous unwrapped line, will point to
    155   // \c &PreprocessorDirectives.
    156   SmallVectorImpl<UnwrappedLine> *CurrentLines;
    157 
    158   // We store for each line whether it must be a declaration depending on
    159   // whether we are in a compound statement or not.
    160   std::vector<bool> DeclarationScopeStack;
    161 
    162   const FormatStyle &Style;
    163   const AdditionalKeywords &Keywords;
    164 
    165   FormatTokenSource *Tokens;
    166   UnwrappedLineConsumer &Callback;
    167 
    168   // FIXME: This is a temporary measure until we have reworked the ownership
    169   // of the format tokens. The goal is to have the actual tokens created and
    170   // owned outside of and handed into the UnwrappedLineParser.
    171   ArrayRef<FormatToken *> AllTokens;
    172 
    173   // Represents preprocessor branch type, so we can find matching
    174   // #if/#else/#endif directives.
    175   enum PPBranchKind {
    176     PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
    177     PP_Unreachable  // #if 0 or a conditional preprocessor block inside #if 0
    178   };
    179 
    180   // Keeps a stack of currently active preprocessor branching directives.
    181   SmallVector<PPBranchKind, 16> PPStack;
    182 
    183   // The \c UnwrappedLineParser re-parses the code for each combination
    184   // of preprocessor branches that can be taken.
    185   // To that end, we take the same branch (#if, #else, or one of the #elif
    186   // branches) for each nesting level of preprocessor branches.
    187   // \c PPBranchLevel stores the current nesting level of preprocessor
    188   // branches during one pass over the code.
    189   int PPBranchLevel;
    190 
    191   // Contains the current branch (#if, #else or one of the #elif branches)
    192   // for each nesting level.
    193   SmallVector<int, 8> PPLevelBranchIndex;
    194 
    195   // Contains the maximum number of branches at each nesting level.
    196   SmallVector<int, 8> PPLevelBranchCount;
    197 
    198   // Contains the number of branches per nesting level we are currently
    199   // in while parsing a preprocessor branch sequence.
    200   // This is used to update PPLevelBranchCount at the end of a branch
    201   // sequence.
    202   std::stack<int> PPChainBranchIndex;
    203 
    204   friend class ScopedLineState;
    205   friend class CompoundStatementIndenter;
    206 };
    207 
    208 struct UnwrappedLineNode {
    209   UnwrappedLineNode() : Tok(nullptr) {}
    210   UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
    211 
    212   FormatToken *Tok;
    213   SmallVector<UnwrappedLine, 0> Children;
    214 };
    215 
    216 inline UnwrappedLine::UnwrappedLine()
    217     : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
    218 
    219 } // end namespace format
    220 } // end namespace clang
    221 
    222 #endif
    223