1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 17 #define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 18 19 #include "UnwrappedLineParser.h" 20 #include "clang/Basic/OperatorPrecedence.h" 21 #include "clang/Format/Format.h" 22 #include <string> 23 24 namespace clang { 25 class Lexer; 26 class SourceManager; 27 28 namespace format { 29 30 enum TokenType { 31 TT_BinaryOperator, 32 TT_BlockComment, 33 TT_CastRParen, 34 TT_ConditionalExpr, 35 TT_CtorInitializerColon, 36 TT_ImplicitStringLiteral, 37 TT_InlineASMColon, 38 TT_InheritanceColon, 39 TT_LineComment, 40 TT_ObjCArrayLiteral, 41 TT_ObjCBlockLParen, 42 TT_ObjCDecl, 43 TT_ObjCForIn, 44 TT_ObjCMethodExpr, 45 TT_ObjCMethodSpecifier, 46 TT_ObjCProperty, 47 TT_ObjCSelectorName, 48 TT_OverloadedOperatorLParen, 49 TT_PointerOrReference, 50 TT_PureVirtualSpecifier, 51 TT_RangeBasedForLoopColon, 52 TT_StartOfName, 53 TT_TemplateCloser, 54 TT_TemplateOpener, 55 TT_TrailingUnaryOperator, 56 TT_UnaryOperator, 57 TT_Unknown 58 }; 59 60 enum LineType { 61 LT_Invalid, 62 LT_Other, 63 LT_BuilderTypeCall, 64 LT_PreprocessorDirective, 65 LT_VirtualFunctionDecl, 66 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 67 LT_ObjCMethodDecl, 68 LT_ObjCProperty // An @property line. 69 }; 70 71 class AnnotatedToken { 72 public: 73 explicit AnnotatedToken(const FormatToken &FormatTok) 74 : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0), 75 CanBreakBefore(false), MustBreakBefore(false), 76 ClosesTemplateDeclaration(false), MatchingParen(NULL), 77 ParameterCount(0), BindingStrength(0), SplitPenalty(0), 78 LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0), 79 FakeRParens(0), LastInChainOfCalls(false) { 80 } 81 82 bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } 83 84 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { 85 return is(K1) || is(K2); 86 } 87 88 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { 89 return is(K1) || is(K2) || is(K3); 90 } 91 92 bool isOneOf( 93 tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, 94 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, 95 tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS, 96 tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS, 97 tok::TokenKind K10 = tok::NUM_TOKENS, 98 tok::TokenKind K11 = tok::NUM_TOKENS, 99 tok::TokenKind K12 = tok::NUM_TOKENS) const { 100 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || 101 is(K8) || is(K9) || is(K10) || is(K11) || is(K12); 102 } 103 104 bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } 105 106 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 107 return FormatTok.Tok.isObjCAtKeyword(Kind); 108 } 109 110 FormatToken FormatTok; 111 112 TokenType Type; 113 114 unsigned SpacesRequiredBefore; 115 bool CanBreakBefore; 116 bool MustBreakBefore; 117 118 bool ClosesTemplateDeclaration; 119 120 AnnotatedToken *MatchingParen; 121 122 /// \brief Number of parameters, if this is "(", "[" or "<". 123 /// 124 /// This is initialized to 1 as we don't need to distinguish functions with 125 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 126 /// the number of commas. 127 unsigned ParameterCount; 128 129 /// \brief The total length of the line up to and including this token. 130 unsigned TotalLength; 131 132 // FIXME: Come up with a 'cleaner' concept. 133 /// \brief The binding strength of a token. This is a combined value of 134 /// operator precedence, parenthesis nesting, etc. 135 unsigned BindingStrength; 136 137 /// \brief Penalty for inserting a line break before this token. 138 unsigned SplitPenalty; 139 140 /// \brief If this is the first ObjC selector name in an ObjC method 141 /// definition or call, this contains the length of the longest name. 142 unsigned LongestObjCSelectorName; 143 144 std::vector<AnnotatedToken> Children; 145 AnnotatedToken *Parent; 146 147 /// \brief Insert this many fake ( before this token for correct indentation. 148 unsigned FakeLParens; 149 /// \brief Insert this many fake ) after this token for correct indentation. 150 unsigned FakeRParens; 151 152 /// \brief Is this the last "." or "->" in a builder-type call? 153 bool LastInChainOfCalls; 154 155 const AnnotatedToken *getPreviousNoneComment() const { 156 AnnotatedToken *Tok = Parent; 157 while (Tok != NULL && Tok->is(tok::comment)) 158 Tok = Tok->Parent; 159 return Tok; 160 } 161 }; 162 163 class AnnotatedLine { 164 public: 165 AnnotatedLine(const UnwrappedLine &Line) 166 : First(Line.Tokens.front()), Level(Line.Level), 167 InPPDirective(Line.InPPDirective), 168 MustBeDeclaration(Line.MustBeDeclaration), 169 MightBeFunctionDecl(false) { 170 assert(!Line.Tokens.empty()); 171 AnnotatedToken *Current = &First; 172 for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), 173 E = Line.Tokens.end(); 174 I != E; ++I) { 175 Current->Children.push_back(AnnotatedToken(*I)); 176 Current->Children[0].Parent = Current; 177 Current = &Current->Children[0]; 178 } 179 Last = Current; 180 } 181 AnnotatedLine(const AnnotatedLine &Other) 182 : First(Other.First), Type(Other.Type), Level(Other.Level), 183 InPPDirective(Other.InPPDirective), 184 MustBeDeclaration(Other.MustBeDeclaration), 185 MightBeFunctionDecl(Other.MightBeFunctionDecl) { 186 Last = &First; 187 while (!Last->Children.empty()) { 188 Last->Children[0].Parent = Last; 189 Last = &Last->Children[0]; 190 } 191 } 192 193 AnnotatedToken First; 194 AnnotatedToken *Last; 195 196 LineType Type; 197 unsigned Level; 198 bool InPPDirective; 199 bool MustBeDeclaration; 200 bool MightBeFunctionDecl; 201 }; 202 203 inline prec::Level getPrecedence(const AnnotatedToken &Tok) { 204 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 205 } 206 207 /// \brief Determines extra information about the tokens comprising an 208 /// \c UnwrappedLine. 209 class TokenAnnotator { 210 public: 211 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, 212 IdentifierInfo &Ident_in) 213 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) { 214 } 215 216 void annotate(AnnotatedLine &Line); 217 void calculateFormattingInformation(AnnotatedLine &Line); 218 219 private: 220 /// \brief Calculate the penalty for splitting before \c Tok. 221 unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok); 222 223 bool spaceRequiredBetween(const AnnotatedLine &Line, 224 const AnnotatedToken &Left, 225 const AnnotatedToken &Right); 226 227 bool spaceRequiredBefore(const AnnotatedLine &Line, 228 const AnnotatedToken &Tok); 229 230 bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right); 231 232 const FormatStyle &Style; 233 SourceManager &SourceMgr; 234 Lexer &Lex; 235 236 // Contextual keywords: 237 IdentifierInfo &Ident_in; 238 }; 239 240 } // end namespace format 241 } // end namespace clang 242 243 #endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 244