Home | History | Annotate | Download | only in Format
      1 //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This file contains the declaration of the UnwrappedLineParser,
     12 /// which turns a stream of tokens into UnwrappedLines.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
     17 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
     18 
     19 #include "FormatToken.h"
     20 #include "clang/Basic/IdentifierTable.h"
     21 #include "clang/Format/Format.h"
     22 #include <list>
     23 #include <stack>
     24 
     25 namespace clang {
     26 namespace format {
     27 
     28 struct UnwrappedLineNode;
     29 
     30 /// \brief An unwrapped line is a sequence of \c Token, that we would like to
     31 /// put on a single line if there was no column limit.
     32 ///
     33 /// This is used as a main interface between the \c UnwrappedLineParser and the
     34 /// \c UnwrappedLineFormatter. The key property is that changing the formatting
     35 /// within an unwrapped line does not affect any other unwrapped lines.
     36 struct UnwrappedLine {
     37   UnwrappedLine();
     38 
     39   // FIXME: Don't use std::list here.
     40   /// \brief The \c Tokens comprising this \c UnwrappedLine.
     41   std::list<UnwrappedLineNode> Tokens;
     42 
     43   /// \brief The indent level of the \c UnwrappedLine.
     44   unsigned Level;
     45 
     46   /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
     47   bool InPPDirective;
     48 
     49   bool MustBeDeclaration;
     50 };
     51 
     52 class UnwrappedLineConsumer {
     53 public:
     54   virtual ~UnwrappedLineConsumer() {}
     55   virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
     56   virtual void finishRun() = 0;
     57 };
     58 
     59 class FormatTokenSource;
     60 
     61 class UnwrappedLineParser {
     62 public:
     63   UnwrappedLineParser(const FormatStyle &Style,
     64                       const AdditionalKeywords &Keywords,
     65                       ArrayRef<FormatToken *> Tokens,
     66                       UnwrappedLineConsumer &Callback);
     67 
     68   void parse();
     69 
     70 private:
     71   void reset();
     72   void parseFile();
     73   void parseLevel(bool HasOpeningBrace);
     74   void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
     75                   bool MunchSemi = true);
     76   void parseChildBlock();
     77   void parsePPDirective();
     78   void parsePPDefine();
     79   void parsePPIf(bool IfDef);
     80   void parsePPElIf();
     81   void parsePPElse();
     82   void parsePPEndIf();
     83   void parsePPUnknown();
     84   void parseStructuralElement();
     85   bool tryToParseBracedList();
     86   bool parseBracedList(bool ContinueOnSemicolons = false);
     87   void parseParens();
     88   void parseSquare();
     89   void parseIfThenElse();
     90   void parseTryCatch();
     91   void parseForOrWhileLoop();
     92   void parseDoWhile();
     93   void parseLabel();
     94   void parseCaseLabel();
     95   void parseSwitch();
     96   void parseNamespace();
     97   void parseNew();
     98   void parseAccessSpecifier();
     99   void parseEnum();
    100   void parseJavaEnumBody();
    101   void parseRecord();
    102   void parseObjCProtocolList();
    103   void parseObjCUntilAtEnd();
    104   void parseObjCInterfaceOrImplementation();
    105   void parseObjCProtocol();
    106   void parseJavaScriptEs6ImportExport();
    107   bool tryToParseLambda();
    108   bool tryToParseLambdaIntroducer();
    109   void tryToParseJSFunction();
    110   void addUnwrappedLine();
    111   bool eof() const;
    112   void nextToken();
    113   void readToken();
    114   void flushComments(bool NewlineBeforeNext);
    115   void pushToken(FormatToken *Tok);
    116   void calculateBraceTypes(bool ExpectClassBody = false);
    117 
    118   // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
    119   // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
    120   // this branch either cannot be taken (for example '#if false'), or should
    121   // not be taken in this round.
    122   void conditionalCompilationCondition(bool Unreachable);
    123   void conditionalCompilationStart(bool Unreachable);
    124   void conditionalCompilationAlternative();
    125   void conditionalCompilationEnd();
    126 
    127   bool isOnNewLine(const FormatToken &FormatTok);
    128 
    129   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
    130   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
    131   // and use that everywhere in the Parser.
    132   std::unique_ptr<UnwrappedLine> Line;
    133 
    134   // Comments are sorted into unwrapped lines by whether they are in the same
    135   // line as the previous token, or not. If not, they belong to the next token.
    136   // Since the next token might already be in a new unwrapped line, we need to
    137   // store the comments belonging to that token.
    138   SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
    139   FormatToken *FormatTok;
    140   bool MustBreakBeforeNextToken;
    141 
    142   // The parsed lines. Only added to through \c CurrentLines.
    143   SmallVector<UnwrappedLine, 8> Lines;
    144 
    145   // Preprocessor directives are parsed out-of-order from other unwrapped lines.
    146   // Thus, we need to keep a list of preprocessor directives to be reported
    147   // after an unwarpped line that has been started was finished.
    148   SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
    149 
    150   // New unwrapped lines are added via CurrentLines.
    151   // Usually points to \c &Lines. While parsing a preprocessor directive when
    152   // there is an unfinished previous unwrapped line, will point to
    153   // \c &PreprocessorDirectives.
    154   SmallVectorImpl<UnwrappedLine> *CurrentLines;
    155 
    156   // We store for each line whether it must be a declaration depending on
    157   // whether we are in a compound statement or not.
    158   std::vector<bool> DeclarationScopeStack;
    159 
    160   const FormatStyle &Style;
    161   const AdditionalKeywords &Keywords;
    162 
    163   FormatTokenSource *Tokens;
    164   UnwrappedLineConsumer &Callback;
    165 
    166   // FIXME: This is a temporary measure until we have reworked the ownership
    167   // of the format tokens. The goal is to have the actual tokens created and
    168   // owned outside of and handed into the UnwrappedLineParser.
    169   ArrayRef<FormatToken *> AllTokens;
    170 
    171   // Represents preprocessor branch type, so we can find matching
    172   // #if/#else/#endif directives.
    173   enum PPBranchKind {
    174     PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
    175     PP_Unreachable  // #if 0 or a conditional preprocessor block inside #if 0
    176   };
    177 
    178   // Keeps a stack of currently active preprocessor branching directives.
    179   SmallVector<PPBranchKind, 16> PPStack;
    180 
    181   // The \c UnwrappedLineParser re-parses the code for each combination
    182   // of preprocessor branches that can be taken.
    183   // To that end, we take the same branch (#if, #else, or one of the #elif
    184   // branches) for each nesting level of preprocessor branches.
    185   // \c PPBranchLevel stores the current nesting level of preprocessor
    186   // branches during one pass over the code.
    187   int PPBranchLevel;
    188 
    189   // Contains the current branch (#if, #else or one of the #elif branches)
    190   // for each nesting level.
    191   SmallVector<int, 8> PPLevelBranchIndex;
    192 
    193   // Contains the maximum number of branches at each nesting level.
    194   SmallVector<int, 8> PPLevelBranchCount;
    195 
    196   // Contains the number of branches per nesting level we are currently
    197   // in while parsing a preprocessor branch sequence.
    198   // This is used to update PPLevelBranchCount at the end of a branch
    199   // sequence.
    200   std::stack<int> PPChainBranchIndex;
    201 
    202   friend class ScopedLineState;
    203   friend class CompoundStatementIndenter;
    204 };
    205 
    206 struct UnwrappedLineNode {
    207   UnwrappedLineNode() : Tok(nullptr) {}
    208   UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
    209 
    210   FormatToken *Tok;
    211   SmallVector<UnwrappedLine, 0> Children;
    212 };
    213 
    214 inline UnwrappedLine::UnwrappedLine()
    215     : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
    216 
    217 } // end namespace format
    218 } // end namespace clang
    219 
    220 #endif
    221