Home | History | Annotate | Download | only in Format
      1 //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This file contains the declaration of the UnwrappedLineParser,
     12 /// which turns a stream of tokens into UnwrappedLines.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
     17 #define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
     18 
     19 #include "FormatToken.h"
     20 #include "clang/Basic/IdentifierTable.h"
     21 #include "clang/Format/Format.h"
     22 #include <list>
     23 
     24 namespace clang {
     25 namespace format {
     26 
     27 struct UnwrappedLineNode;
     28 
     29 /// \brief An unwrapped line is a sequence of \c Token, that we would like to
     30 /// put on a single line if there was no column limit.
     31 ///
     32 /// This is used as a main interface between the \c UnwrappedLineParser and the
     33 /// \c UnwrappedLineFormatter. The key property is that changing the formatting
     34 /// within an unwrapped line does not affect any other unwrapped lines.
     35 struct UnwrappedLine {
     36   UnwrappedLine();
     37 
     38   // FIXME: Don't use std::list here.
     39   /// \brief The \c Tokens comprising this \c UnwrappedLine.
     40   std::list<UnwrappedLineNode> Tokens;
     41 
     42   /// \brief The indent level of the \c UnwrappedLine.
     43   unsigned Level;
     44 
     45   /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
     46   bool InPPDirective;
     47 
     48   bool MustBeDeclaration;
     49 };
     50 
     51 class UnwrappedLineConsumer {
     52 public:
     53   virtual ~UnwrappedLineConsumer() {}
     54   virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
     55   virtual void finishRun() = 0;
     56 };
     57 
     58 class FormatTokenSource;
     59 
     60 class UnwrappedLineParser {
     61 public:
     62   UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens,
     63                       UnwrappedLineConsumer &Callback);
     64 
     65   /// Returns true in case of a structural error.
     66   bool parse();
     67 
     68 private:
     69   void reset();
     70   void parseFile();
     71   void parseLevel(bool HasOpeningBrace);
     72   void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
     73                   bool MunchSemi = true);
     74   void parseChildBlock();
     75   void parsePPDirective();
     76   void parsePPDefine();
     77   void parsePPIf(bool IfDef);
     78   void parsePPElIf();
     79   void parsePPElse();
     80   void parsePPEndIf();
     81   void parsePPUnknown();
     82   void parseStructuralElement();
     83   bool tryToParseBracedList();
     84   bool parseBracedList(bool ContinueOnSemicolons = false);
     85   void parseParens();
     86   void parseSquare();
     87   void parseIfThenElse();
     88   void parseTryCatch();
     89   void parseForOrWhileLoop();
     90   void parseDoWhile();
     91   void parseLabel();
     92   void parseCaseLabel();
     93   void parseSwitch();
     94   void parseNamespace();
     95   void parseAccessSpecifier();
     96   void parseEnum();
     97   void parseRecord();
     98   void parseObjCProtocolList();
     99   void parseObjCUntilAtEnd();
    100   void parseObjCInterfaceOrImplementation();
    101   void parseObjCProtocol();
    102   bool tryToParseLambda();
    103   bool tryToParseLambdaIntroducer();
    104   void tryToParseJSFunction();
    105   void addUnwrappedLine();
    106   bool eof() const;
    107   void nextToken();
    108   void readToken();
    109   void flushComments(bool NewlineBeforeNext);
    110   void pushToken(FormatToken *Tok);
    111   void calculateBraceTypes();
    112 
    113   // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
    114   // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
    115   // this branch either cannot be taken (for example '#if false'), or should
    116   // not be taken in this round.
    117   void conditionalCompilationCondition(bool Unreachable);
    118   void conditionalCompilationStart(bool Unreachable);
    119   void conditionalCompilationAlternative();
    120   void conditionalCompilationEnd();
    121 
    122   bool isOnNewLine(const FormatToken &FormatTok);
    123 
    124   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
    125   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
    126   // and use that everywhere in the Parser.
    127   std::unique_ptr<UnwrappedLine> Line;
    128 
    129   // Comments are sorted into unwrapped lines by whether they are in the same
    130   // line as the previous token, or not. If not, they belong to the next token.
    131   // Since the next token might already be in a new unwrapped line, we need to
    132   // store the comments belonging to that token.
    133   SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
    134   FormatToken *FormatTok;
    135   bool MustBreakBeforeNextToken;
    136 
    137   // The parsed lines. Only added to through \c CurrentLines.
    138   SmallVector<UnwrappedLine, 8> Lines;
    139 
    140   // Preprocessor directives are parsed out-of-order from other unwrapped lines.
    141   // Thus, we need to keep a list of preprocessor directives to be reported
    142   // after an unwarpped line that has been started was finished.
    143   SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
    144 
    145   // New unwrapped lines are added via CurrentLines.
    146   // Usually points to \c &Lines. While parsing a preprocessor directive when
    147   // there is an unfinished previous unwrapped line, will point to
    148   // \c &PreprocessorDirectives.
    149   SmallVectorImpl<UnwrappedLine> *CurrentLines;
    150 
    151   // We store for each line whether it must be a declaration depending on
    152   // whether we are in a compound statement or not.
    153   std::vector<bool> DeclarationScopeStack;
    154 
    155   // Will be true if we encounter an error that leads to possibily incorrect
    156   // indentation levels.
    157   bool StructuralError;
    158 
    159   const FormatStyle &Style;
    160   FormatTokenSource *Tokens;
    161   UnwrappedLineConsumer &Callback;
    162 
    163   // FIXME: This is a temporary measure until we have reworked the ownership
    164   // of the format tokens. The goal is to have the actual tokens created and
    165   // owned outside of and handed into the UnwrappedLineParser.
    166   ArrayRef<FormatToken *> AllTokens;
    167 
    168   // Represents preprocessor branch type, so we can find matching
    169   // #if/#else/#endif directives.
    170   enum PPBranchKind {
    171     PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
    172     PP_Unreachable  // #if 0 or a conditional preprocessor block inside #if 0
    173   };
    174 
    175   // Keeps a stack of currently active preprocessor branching directives.
    176   SmallVector<PPBranchKind, 16> PPStack;
    177 
    178   // The \c UnwrappedLineParser re-parses the code for each combination
    179   // of preprocessor branches that can be taken.
    180   // To that end, we take the same branch (#if, #else, or one of the #elif
    181   // branches) for each nesting level of preprocessor branches.
    182   // \c PPBranchLevel stores the current nesting level of preprocessor
    183   // branches during one pass over the code.
    184   int PPBranchLevel;
    185 
    186   // Contains the current branch (#if, #else or one of the #elif branches)
    187   // for each nesting level.
    188   SmallVector<int, 8> PPLevelBranchIndex;
    189 
    190   // Contains the maximum number of branches at each nesting level.
    191   SmallVector<int, 8> PPLevelBranchCount;
    192 
    193   // Contains the number of branches per nesting level we are currently
    194   // in while parsing a preprocessor branch sequence.
    195   // This is used to update PPLevelBranchCount at the end of a branch
    196   // sequence.
    197   std::stack<int> PPChainBranchIndex;
    198 
    199   friend class ScopedLineState;
    200   friend class CompoundStatementIndenter;
    201 };
    202 
    203 struct UnwrappedLineNode {
    204   UnwrappedLineNode() : Tok(nullptr) {}
    205   UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
    206 
    207   FormatToken *Tok;
    208   SmallVector<UnwrappedLine, 0> Children;
    209 };
    210 
    211 inline UnwrappedLine::UnwrappedLine()
    212     : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
    213 
    214 } // end namespace format
    215 } // end namespace clang
    216 
    217 #endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
    218