Home | History | Annotate | Download | only in Format
      1 //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This file contains the declaration of the UnwrappedLineParser,
     12 /// which turns a stream of tokens into UnwrappedLines.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
     17 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
     18 
     19 #include "FormatToken.h"
     20 #include "clang/Basic/IdentifierTable.h"
     21 #include "clang/Format/Format.h"
     22 #include <list>
     23 #include <stack>
     24 
     25 namespace clang {
     26 namespace format {
     27 
     28 struct UnwrappedLineNode;
     29 
     30 /// \brief An unwrapped line is a sequence of \c Token, that we would like to
     31 /// put on a single line if there was no column limit.
     32 ///
     33 /// This is used as a main interface between the \c UnwrappedLineParser and the
     34 /// \c UnwrappedLineFormatter. The key property is that changing the formatting
     35 /// within an unwrapped line does not affect any other unwrapped lines.
     36 struct UnwrappedLine {
     37   UnwrappedLine();
     38 
     39   // FIXME: Don't use std::list here.
     40   /// \brief The \c Tokens comprising this \c UnwrappedLine.
     41   std::list<UnwrappedLineNode> Tokens;
     42 
     43   /// \brief The indent level of the \c UnwrappedLine.
     44   unsigned Level;
     45 
     46   /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
     47   bool InPPDirective;
     48 
     49   bool MustBeDeclaration;
     50 };
     51 
     52 class UnwrappedLineConsumer {
     53 public:
     54   virtual ~UnwrappedLineConsumer() {}
     55   virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
     56   virtual void finishRun() = 0;
     57 };
     58 
     59 class FormatTokenSource;
     60 
     61 class UnwrappedLineParser {
     62 public:
     63   UnwrappedLineParser(const FormatStyle &Style,
     64                       const AdditionalKeywords &Keywords,
     65                       ArrayRef<FormatToken *> Tokens,
     66                       UnwrappedLineConsumer &Callback);
     67 
     68   /// Returns true in case of a structural error.
     69   bool parse();
     70 
     71 private:
     72   void reset();
     73   void parseFile();
     74   void parseLevel(bool HasOpeningBrace);
     75   void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
     76                   bool MunchSemi = true);
     77   void parseChildBlock();
     78   void parsePPDirective();
     79   void parsePPDefine();
     80   void parsePPIf(bool IfDef);
     81   void parsePPElIf();
     82   void parsePPElse();
     83   void parsePPEndIf();
     84   void parsePPUnknown();
     85   void parseStructuralElement();
     86   bool tryToParseBracedList();
     87   bool parseBracedList(bool ContinueOnSemicolons = false);
     88   void parseParens();
     89   void parseSquare();
     90   void parseIfThenElse();
     91   void parseTryCatch();
     92   void parseForOrWhileLoop();
     93   void parseDoWhile();
     94   void parseLabel();
     95   void parseCaseLabel();
     96   void parseSwitch();
     97   void parseNamespace();
     98   void parseNew();
     99   void parseAccessSpecifier();
    100   void parseEnum();
    101   void parseJavaEnumBody();
    102   void parseRecord();
    103   void parseObjCProtocolList();
    104   void parseObjCUntilAtEnd();
    105   void parseObjCInterfaceOrImplementation();
    106   void parseObjCProtocol();
    107   void parseJavaScriptEs6ImportExport();
    108   bool tryToParseLambda();
    109   bool tryToParseLambdaIntroducer();
    110   void tryToParseJSFunction();
    111   void addUnwrappedLine();
    112   bool eof() const;
    113   void nextToken();
    114   void readToken();
    115   void flushComments(bool NewlineBeforeNext);
    116   void pushToken(FormatToken *Tok);
    117   void calculateBraceTypes();
    118 
    119   // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
    120   // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
    121   // this branch either cannot be taken (for example '#if false'), or should
    122   // not be taken in this round.
    123   void conditionalCompilationCondition(bool Unreachable);
    124   void conditionalCompilationStart(bool Unreachable);
    125   void conditionalCompilationAlternative();
    126   void conditionalCompilationEnd();
    127 
    128   bool isOnNewLine(const FormatToken &FormatTok);
    129 
    130   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
    131   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
    132   // and use that everywhere in the Parser.
    133   std::unique_ptr<UnwrappedLine> Line;
    134 
    135   // Comments are sorted into unwrapped lines by whether they are in the same
    136   // line as the previous token, or not. If not, they belong to the next token.
    137   // Since the next token might already be in a new unwrapped line, we need to
    138   // store the comments belonging to that token.
    139   SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
    140   FormatToken *FormatTok;
    141   bool MustBreakBeforeNextToken;
    142 
    143   // The parsed lines. Only added to through \c CurrentLines.
    144   SmallVector<UnwrappedLine, 8> Lines;
    145 
    146   // Preprocessor directives are parsed out-of-order from other unwrapped lines.
    147   // Thus, we need to keep a list of preprocessor directives to be reported
    148   // after an unwarpped line that has been started was finished.
    149   SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
    150 
    151   // New unwrapped lines are added via CurrentLines.
    152   // Usually points to \c &Lines. While parsing a preprocessor directive when
    153   // there is an unfinished previous unwrapped line, will point to
    154   // \c &PreprocessorDirectives.
    155   SmallVectorImpl<UnwrappedLine> *CurrentLines;
    156 
    157   // We store for each line whether it must be a declaration depending on
    158   // whether we are in a compound statement or not.
    159   std::vector<bool> DeclarationScopeStack;
    160 
    161   // Will be true if we encounter an error that leads to possibily incorrect
    162   // indentation levels.
    163   bool StructuralError;
    164 
    165   const FormatStyle &Style;
    166   const AdditionalKeywords &Keywords;
    167 
    168   FormatTokenSource *Tokens;
    169   UnwrappedLineConsumer &Callback;
    170 
    171   // FIXME: This is a temporary measure until we have reworked the ownership
    172   // of the format tokens. The goal is to have the actual tokens created and
    173   // owned outside of and handed into the UnwrappedLineParser.
    174   ArrayRef<FormatToken *> AllTokens;
    175 
    176   // Represents preprocessor branch type, so we can find matching
    177   // #if/#else/#endif directives.
    178   enum PPBranchKind {
    179     PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
    180     PP_Unreachable  // #if 0 or a conditional preprocessor block inside #if 0
    181   };
    182 
    183   // Keeps a stack of currently active preprocessor branching directives.
    184   SmallVector<PPBranchKind, 16> PPStack;
    185 
    186   // The \c UnwrappedLineParser re-parses the code for each combination
    187   // of preprocessor branches that can be taken.
    188   // To that end, we take the same branch (#if, #else, or one of the #elif
    189   // branches) for each nesting level of preprocessor branches.
    190   // \c PPBranchLevel stores the current nesting level of preprocessor
    191   // branches during one pass over the code.
    192   int PPBranchLevel;
    193 
    194   // Contains the current branch (#if, #else or one of the #elif branches)
    195   // for each nesting level.
    196   SmallVector<int, 8> PPLevelBranchIndex;
    197 
    198   // Contains the maximum number of branches at each nesting level.
    199   SmallVector<int, 8> PPLevelBranchCount;
    200 
    201   // Contains the number of branches per nesting level we are currently
    202   // in while parsing a preprocessor branch sequence.
    203   // This is used to update PPLevelBranchCount at the end of a branch
    204   // sequence.
    205   std::stack<int> PPChainBranchIndex;
    206 
    207   friend class ScopedLineState;
    208   friend class CompoundStatementIndenter;
    209 };
    210 
    211 struct UnwrappedLineNode {
    212   UnwrappedLineNode() : Tok(nullptr) {}
    213   UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
    214 
    215   FormatToken *Tok;
    216   SmallVector<UnwrappedLine, 0> Children;
    217 };
    218 
    219 inline UnwrappedLine::UnwrappedLine()
    220     : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
    221 
    222 } // end namespace format
    223 } // end namespace clang
    224 
    225 #endif
    226