1 //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the declaration of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 17 #define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 18 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Format/Format.h" 22 #include "clang/Lex/Lexer.h" 23 #include <list> 24 25 namespace clang { 26 27 class DiagnosticsEngine; 28 29 namespace format { 30 31 /// \brief A wrapper around a \c Token storing information about the 32 /// whitespace characters preceeding it. 33 struct FormatToken { 34 FormatToken() 35 : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0), 36 LastNewlineOffset(0), TokenLength(0), IsFirst(false), 37 MustBreakBefore(false) {} 38 39 /// \brief The \c Token. 40 Token Tok; 41 42 /// \brief The number of newlines immediately before the \c Token. 43 /// 44 /// This can be used to determine what the user wrote in the original code 45 /// and thereby e.g. leave an empty line between two function definitions. 46 unsigned NewlinesBefore; 47 48 /// \brief Whether there is at least one unescaped newline before the \c 49 /// Token. 50 bool HasUnescapedNewline; 51 52 /// \brief The location of the start of the whitespace immediately preceeding 53 /// the \c Token. 54 /// 55 /// Used together with \c WhiteSpaceLength to create a \c Replacement. 56 SourceLocation WhiteSpaceStart; 57 58 /// \brief The length in characters of the whitespace immediately preceeding 59 /// the \c Token. 60 unsigned WhiteSpaceLength; 61 62 /// \brief The offset just past the last '\n' in this token's leading 63 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 64 unsigned LastNewlineOffset; 65 66 /// \brief The length of the non-whitespace parts of the token. This is 67 /// necessary because we need to handle escaped newlines that are stored 68 /// with the token. 69 unsigned TokenLength; 70 71 /// \brief Indicates that this is the first token. 72 bool IsFirst; 73 74 /// \brief Whether there must be a line break before this token. 75 /// 76 /// This happens for example when a preprocessor directive ended directly 77 /// before the token. 78 bool MustBreakBefore; 79 }; 80 81 /// \brief An unwrapped line is a sequence of \c Token, that we would like to 82 /// put on a single line if there was no column limit. 83 /// 84 /// This is used as a main interface between the \c UnwrappedLineParser and the 85 /// \c UnwrappedLineFormatter. The key property is that changing the formatting 86 /// within an unwrapped line does not affect any other unwrapped lines. 87 struct UnwrappedLine { 88 UnwrappedLine() : Level(0), InPPDirective(false), MustBeDeclaration(false) { 89 } 90 91 // FIXME: Don't use std::list here. 92 /// \brief The \c Tokens comprising this \c UnwrappedLine. 93 std::list<FormatToken> Tokens; 94 95 /// \brief The indent level of the \c UnwrappedLine. 96 unsigned Level; 97 98 /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive. 99 bool InPPDirective; 100 101 bool MustBeDeclaration; 102 }; 103 104 class UnwrappedLineConsumer { 105 public: 106 virtual ~UnwrappedLineConsumer() { 107 } 108 virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; 109 }; 110 111 class FormatTokenSource { 112 public: 113 virtual ~FormatTokenSource() { 114 } 115 virtual FormatToken getNextToken() = 0; 116 }; 117 118 class UnwrappedLineParser { 119 public: 120 UnwrappedLineParser(clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 121 FormatTokenSource &Tokens, 122 UnwrappedLineConsumer &Callback); 123 124 /// Returns true in case of a structural error. 125 bool parse(); 126 127 private: 128 bool parseFile(); 129 bool parseLevel(bool HasOpeningBrace); 130 bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1); 131 void parsePPDirective(); 132 void parsePPDefine(); 133 void parsePPUnknown(); 134 void parseStructuralElement(); 135 void parseBracedList(); 136 void parseReturn(); 137 void parseParens(); 138 void parseIfThenElse(); 139 void parseForOrWhileLoop(); 140 void parseDoWhile(); 141 void parseLabel(); 142 void parseCaseLabel(); 143 void parseSwitch(); 144 void parseNamespace(); 145 void parseAccessSpecifier(); 146 void parseEnum(); 147 void parseRecord(); 148 void parseObjCProtocolList(); 149 void parseObjCUntilAtEnd(); 150 void parseObjCInterfaceOrImplementation(); 151 void parseObjCProtocol(); 152 void addUnwrappedLine(); 153 bool eof() const; 154 void nextToken(); 155 void readToken(); 156 void flushComments(bool NewlineBeforeNext); 157 void pushToken(const FormatToken &Tok); 158 159 // FIXME: We are constantly running into bugs where Line.Level is incorrectly 160 // subtracted from beyond 0. Introduce a method to subtract from Line.Level 161 // and use that everywhere in the Parser. 162 OwningPtr<UnwrappedLine> Line; 163 164 // Comments are sorted into unwrapped lines by whether they are in the same 165 // line as the previous token, or not. If not, they belong to the next token. 166 // Since the next token might already be in a new unwrapped line, we need to 167 // store the comments belonging to that token. 168 SmallVector<FormatToken, 1> CommentsBeforeNextToken; 169 FormatToken FormatTok; 170 bool MustBreakBeforeNextToken; 171 172 // The parsed lines. Only added to through \c CurrentLines. 173 std::vector<UnwrappedLine> Lines; 174 175 // Preprocessor directives are parsed out-of-order from other unwrapped lines. 176 // Thus, we need to keep a list of preprocessor directives to be reported 177 // after an unwarpped line that has been started was finished. 178 std::vector<UnwrappedLine> PreprocessorDirectives; 179 180 // New unwrapped lines are added via CurrentLines. 181 // Usually points to \c &Lines. While parsing a preprocessor directive when 182 // there is an unfinished previous unwrapped line, will point to 183 // \c &PreprocessorDirectives. 184 std::vector<UnwrappedLine> *CurrentLines; 185 186 // We store for each line whether it must be a declaration depending on 187 // whether we are in a compound statement or not. 188 std::vector<bool> DeclarationScopeStack; 189 190 clang::DiagnosticsEngine &Diag; 191 const FormatStyle &Style; 192 FormatTokenSource *Tokens; 193 UnwrappedLineConsumer &Callback; 194 195 friend class ScopedLineState; 196 }; 197 198 } // end namespace format 199 } // end namespace clang 200 201 #endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 202