1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the declaration of the FormatToken, a wrapper 12 /// around Token with additional information related to formatting. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H 17 #define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H 18 19 #include "clang/Basic/OperatorPrecedence.h" 20 #include "clang/Lex/Lexer.h" 21 22 namespace clang { 23 namespace format { 24 25 enum TokenType { 26 TT_BinaryOperator, 27 TT_BlockComment, 28 TT_CastRParen, 29 TT_ConditionalExpr, 30 TT_CtorInitializerColon, 31 TT_CtorInitializerComma, 32 TT_DesignatedInitializerPeriod, 33 TT_ImplicitStringLiteral, 34 TT_InlineASMColon, 35 TT_InheritanceColon, 36 TT_FunctionTypeLParen, 37 TT_LineComment, 38 TT_ObjCArrayLiteral, 39 TT_ObjCBlockLParen, 40 TT_ObjCDecl, 41 TT_ObjCDictLiteral, 42 TT_ObjCForIn, 43 TT_ObjCMethodExpr, 44 TT_ObjCMethodSpecifier, 45 TT_ObjCProperty, 46 TT_ObjCSelectorName, 47 TT_OverloadedOperator, 48 TT_OverloadedOperatorLParen, 49 TT_PointerOrReference, 50 TT_PureVirtualSpecifier, 51 TT_RangeBasedForLoopColon, 52 TT_StartOfName, 53 TT_TemplateCloser, 54 TT_TemplateOpener, 55 TT_TrailingReturnArrow, 56 TT_TrailingUnaryOperator, 57 TT_UnaryOperator, 58 TT_Unknown 59 }; 60 61 // Represents what type of block a set of braces open. 62 enum BraceBlockKind { 63 BK_Unknown, 64 BK_Block, 65 BK_BracedInit 66 }; 67 68 // The packing kind of a function's parameters. 69 enum ParameterPackingKind { 70 PPK_BinPacked, 71 PPK_OnePerLine, 72 PPK_Inconclusive 73 }; 74 75 /// \brief A wrapper around a \c Token storing information about the 76 /// whitespace characters preceeding it. 77 struct FormatToken { 78 FormatToken() 79 : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), 80 CodePointCount(0), IsFirst(false), MustBreakBefore(false), 81 IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown), 82 SpacesRequiredBefore(0), CanBreakBefore(false), 83 ClosesTemplateDeclaration(false), ParameterCount(0), 84 PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0), 85 BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0), 86 FakeRParens(0), LastInChainOfCalls(false), 87 PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL), 88 Next(NULL) {} 89 90 /// \brief The \c Token. 91 Token Tok; 92 93 /// \brief The number of newlines immediately before the \c Token. 94 /// 95 /// This can be used to determine what the user wrote in the original code 96 /// and thereby e.g. leave an empty line between two function definitions. 97 unsigned NewlinesBefore; 98 99 /// \brief Whether there is at least one unescaped newline before the \c 100 /// Token. 101 bool HasUnescapedNewline; 102 103 /// \brief The range of the whitespace immediately preceeding the \c Token. 104 SourceRange WhitespaceRange; 105 106 /// \brief The offset just past the last '\n' in this token's leading 107 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 108 unsigned LastNewlineOffset; 109 110 /// \brief The length of the non-whitespace parts of the token in CodePoints. 111 /// We need this to correctly measure number of columns a token spans. 112 unsigned CodePointCount; 113 114 /// \brief Indicates that this is the first token. 115 bool IsFirst; 116 117 /// \brief Whether there must be a line break before this token. 118 /// 119 /// This happens for example when a preprocessor directive ended directly 120 /// before the token. 121 bool MustBreakBefore; 122 123 /// \brief Returns actual token start location without leading escaped 124 /// newlines and whitespace. 125 /// 126 /// This can be different to Tok.getLocation(), which includes leading escaped 127 /// newlines. 128 SourceLocation getStartOfNonWhitespace() const { 129 return WhitespaceRange.getEnd(); 130 } 131 132 /// \brief The raw text of the token. 133 /// 134 /// Contains the raw token text without leading whitespace and without leading 135 /// escaped newlines. 136 StringRef TokenText; 137 138 /// \brief Set to \c true if this token is an unterminated literal. 139 bool IsUnterminatedLiteral; 140 141 /// \brief Contains the kind of block if this token is a brace. 142 BraceBlockKind BlockKind; 143 144 TokenType Type; 145 146 unsigned SpacesRequiredBefore; 147 bool CanBreakBefore; 148 149 bool ClosesTemplateDeclaration; 150 151 /// \brief Number of parameters, if this is "(", "[" or "<". 152 /// 153 /// This is initialized to 1 as we don't need to distinguish functions with 154 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 155 /// the number of commas. 156 unsigned ParameterCount; 157 158 /// \brief If this is an opening parenthesis, how are the parameters packed? 159 ParameterPackingKind PackingKind; 160 161 /// \brief The total length of the line up to and including this token. 162 unsigned TotalLength; 163 164 /// \brief The length of following tokens until the next natural split point, 165 /// or the next token that can be broken. 166 unsigned UnbreakableTailLength; 167 168 // FIXME: Come up with a 'cleaner' concept. 169 /// \brief The binding strength of a token. This is a combined value of 170 /// operator precedence, parenthesis nesting, etc. 171 unsigned BindingStrength; 172 173 /// \brief Penalty for inserting a line break before this token. 174 unsigned SplitPenalty; 175 176 /// \brief If this is the first ObjC selector name in an ObjC method 177 /// definition or call, this contains the length of the longest name. 178 unsigned LongestObjCSelectorName; 179 180 /// \brief Stores the number of required fake parentheses and the 181 /// corresponding operator precedence. 182 /// 183 /// If multiple fake parentheses start at a token, this vector stores them in 184 /// reverse order, i.e. inner fake parenthesis first. 185 SmallVector<prec::Level, 4> FakeLParens; 186 /// \brief Insert this many fake ) after this token for correct indentation. 187 unsigned FakeRParens; 188 189 /// \brief Is this the last "." or "->" in a builder-type call? 190 bool LastInChainOfCalls; 191 192 /// \brief Is this token part of a \c DeclStmt defining multiple variables? 193 /// 194 /// Only set if \c Type == \c TT_StartOfName. 195 bool PartOfMultiVariableDeclStmt; 196 197 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } 198 199 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { 200 return is(K1) || is(K2); 201 } 202 203 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { 204 return is(K1) || is(K2) || is(K3); 205 } 206 207 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, 208 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, 209 tok::TokenKind K6 = tok::NUM_TOKENS, 210 tok::TokenKind K7 = tok::NUM_TOKENS, 211 tok::TokenKind K8 = tok::NUM_TOKENS, 212 tok::TokenKind K9 = tok::NUM_TOKENS, 213 tok::TokenKind K10 = tok::NUM_TOKENS, 214 tok::TokenKind K11 = tok::NUM_TOKENS, 215 tok::TokenKind K12 = tok::NUM_TOKENS) const { 216 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || 217 is(K8) || is(K9) || is(K10) || is(K11) || is(K12); 218 } 219 220 bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); } 221 222 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 223 return Tok.isObjCAtKeyword(Kind); 224 } 225 226 bool isAccessSpecifier(bool ColonRequired = true) const { 227 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 228 (!ColonRequired || (Next && Next->is(tok::colon))); 229 } 230 231 bool isObjCAccessSpecifier() const { 232 return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) || 233 Next->isObjCAtKeyword(tok::objc_protected) || 234 Next->isObjCAtKeyword(tok::objc_package) || 235 Next->isObjCAtKeyword(tok::objc_private)); 236 } 237 238 /// \brief Returns whether \p Tok is ([{ or a template opening <. 239 bool opensScope() const { 240 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || 241 Type == TT_TemplateOpener; 242 } 243 /// \brief Returns whether \p Tok is )]} or a template closing >. 244 bool closesScope() const { 245 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || 246 Type == TT_TemplateCloser; 247 } 248 249 bool isUnaryOperator() const { 250 switch (Tok.getKind()) { 251 case tok::plus: 252 case tok::plusplus: 253 case tok::minus: 254 case tok::minusminus: 255 case tok::exclaim: 256 case tok::tilde: 257 case tok::kw_sizeof: 258 case tok::kw_alignof: 259 return true; 260 default: 261 return false; 262 } 263 } 264 bool isBinaryOperator() const { 265 // Comma is a binary operator, but does not behave as such wrt. formatting. 266 return getPrecedence() > prec::Comma; 267 } 268 bool isTrailingComment() const { 269 return is(tok::comment) && (!Next || Next->NewlinesBefore > 0); 270 } 271 272 prec::Level getPrecedence() const { 273 return getBinOpPrecedence(Tok.getKind(), true, true); 274 } 275 276 /// \brief Returns the previous token ignoring comments. 277 FormatToken *getPreviousNonComment() const { 278 FormatToken *Tok = Previous; 279 while (Tok != NULL && Tok->is(tok::comment)) 280 Tok = Tok->Previous; 281 return Tok; 282 } 283 284 /// \brief Returns the next token ignoring comments. 285 const FormatToken *getNextNonComment() const { 286 const FormatToken *Tok = Next; 287 while (Tok != NULL && Tok->is(tok::comment)) 288 Tok = Tok->Next; 289 return Tok; 290 } 291 292 FormatToken *MatchingParen; 293 294 FormatToken *Previous; 295 FormatToken *Next; 296 297 private: 298 // Disallow copying. 299 FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION; 300 void operator=(const FormatToken &) LLVM_DELETED_FUNCTION; 301 }; 302 303 } // namespace format 304 } // namespace clang 305 306 #endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H 307