1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "TokenAnnotator.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "clang/Lex/Lexer.h" 19 20 namespace clang { 21 namespace format { 22 23 static bool isUnaryOperator(const AnnotatedToken &Tok) { 24 switch (Tok.FormatTok.Tok.getKind()) { 25 case tok::plus: 26 case tok::plusplus: 27 case tok::minus: 28 case tok::minusminus: 29 case tok::exclaim: 30 case tok::tilde: 31 case tok::kw_sizeof: 32 case tok::kw_alignof: 33 return true; 34 default: 35 return false; 36 } 37 } 38 39 static bool isBinaryOperator(const AnnotatedToken &Tok) { 40 // Comma is a binary operator, but does not behave as such wrt. formatting. 41 return getPrecedence(Tok) > prec::Comma; 42 } 43 44 // Returns the previous token ignoring comments. 45 static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) { 46 AnnotatedToken *PrevToken = Tok.Parent; 47 while (PrevToken != NULL && PrevToken->is(tok::comment)) 48 PrevToken = PrevToken->Parent; 49 return PrevToken; 50 } 51 static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { 52 return getPreviousToken(const_cast<AnnotatedToken &>(Tok)); 53 } 54 55 static bool isTrailingComment(AnnotatedToken *Tok) { 56 return Tok != NULL && Tok->is(tok::comment) && 57 (Tok->Children.empty() || 58 Tok->Children[0].FormatTok.NewlinesBefore > 0); 59 } 60 61 // Returns the next token ignoring comments. 62 static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { 63 if (Tok.Children.empty()) 64 return NULL; 65 const AnnotatedToken *NextToken = &Tok.Children[0]; 66 while (NextToken->is(tok::comment)) { 67 if (NextToken->Children.empty()) 68 return NULL; 69 NextToken = &NextToken->Children[0]; 70 } 71 return NextToken; 72 } 73 74 static bool closesScope(const AnnotatedToken &Tok) { 75 return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || 76 Tok.Type == TT_TemplateCloser; 77 } 78 79 static bool opensScope(const AnnotatedToken &Tok) { 80 return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || 81 Tok.Type == TT_TemplateOpener; 82 } 83 84 /// \brief A parser that gathers additional information about tokens. 85 /// 86 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 87 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 88 /// into template parameter lists. 89 class AnnotatingParser { 90 public: 91 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, 92 IdentifierInfo &Ident_in) 93 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First), 94 KeywordVirtualFound(false), Ident_in(Ident_in) { 95 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false)); 96 } 97 98 private: 99 bool parseAngle() { 100 if (CurrentToken == NULL) 101 return false; 102 ScopedContextCreator ContextCreator(*this, tok::less, 10); 103 AnnotatedToken *Left = CurrentToken->Parent; 104 Contexts.back().IsExpression = false; 105 while (CurrentToken != NULL) { 106 if (CurrentToken->is(tok::greater)) { 107 Left->MatchingParen = CurrentToken; 108 CurrentToken->MatchingParen = Left; 109 CurrentToken->Type = TT_TemplateCloser; 110 next(); 111 return true; 112 } 113 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 114 tok::pipepipe, tok::ampamp, tok::question, 115 tok::colon)) 116 return false; 117 updateParameterCount(Left, CurrentToken); 118 if (!consumeToken()) 119 return false; 120 } 121 return false; 122 } 123 124 bool parseParens(bool LookForDecls = false) { 125 if (CurrentToken == NULL) 126 return false; 127 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 128 129 // FIXME: This is a bit of a hack. Do better. 130 Contexts.back().ColonIsForRangeExpr = 131 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 132 133 bool StartsObjCMethodExpr = false; 134 AnnotatedToken *Left = CurrentToken->Parent; 135 if (CurrentToken->is(tok::caret)) { 136 // ^( starts a block. 137 Left->Type = TT_ObjCBlockLParen; 138 } else if (AnnotatedToken *MaybeSel = Left->Parent) { 139 // @selector( starts a selector. 140 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && 141 MaybeSel->Parent->is(tok::at)) { 142 StartsObjCMethodExpr = true; 143 } 144 } 145 146 if (StartsObjCMethodExpr) { 147 Contexts.back().ColonIsObjCMethodExpr = true; 148 Left->Type = TT_ObjCMethodExpr; 149 } 150 151 while (CurrentToken != NULL) { 152 // LookForDecls is set when "if (" has been seen. Check for 153 // 'identifier' '*' 'identifier' followed by not '=' -- this 154 // '*' has to be a binary operator but determineStarAmpUsage() will 155 // categorize it as an unary operator, so set the right type here. 156 if (LookForDecls && !CurrentToken->Children.empty()) { 157 AnnotatedToken &Prev = *CurrentToken->Parent; 158 AnnotatedToken &Next = CurrentToken->Children[0]; 159 if (Prev.Parent->is(tok::identifier) && 160 Prev.isOneOf(tok::star, tok::amp, tok::ampamp) && 161 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { 162 Prev.Type = TT_BinaryOperator; 163 LookForDecls = false; 164 } 165 } 166 167 if (CurrentToken->is(tok::r_paren)) { 168 Left->MatchingParen = CurrentToken; 169 CurrentToken->MatchingParen = Left; 170 171 if (StartsObjCMethodExpr) { 172 CurrentToken->Type = TT_ObjCMethodExpr; 173 if (Contexts.back().FirstObjCSelectorName != NULL) { 174 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 175 Contexts.back().LongestObjCSelectorName; 176 } 177 } 178 179 next(); 180 return true; 181 } 182 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 183 return false; 184 updateParameterCount(Left, CurrentToken); 185 if (!consumeToken()) 186 return false; 187 } 188 return false; 189 } 190 191 bool parseSquare() { 192 if (!CurrentToken) 193 return false; 194 195 // A '[' could be an index subscript (after an indentifier or after 196 // ')' or ']'), it could be the start of an Objective-C method 197 // expression, or it could the the start of an Objective-C array literal. 198 AnnotatedToken *Left = CurrentToken->Parent; 199 AnnotatedToken *Parent = getPreviousToken(*Left); 200 bool StartsObjCMethodExpr = 201 Contexts.back().CanBeExpression && 202 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 203 tok::kw_return, tok::kw_throw) || 204 isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn || 205 Parent->Type == TT_CastRParen || 206 getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) > 207 prec::Unknown); 208 ScopedContextCreator ContextCreator(*this, tok::l_square, 10); 209 Contexts.back().IsExpression = true; 210 bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at); 211 212 if (StartsObjCMethodExpr) { 213 Contexts.back().ColonIsObjCMethodExpr = true; 214 Left->Type = TT_ObjCMethodExpr; 215 } else if (StartsObjCArrayLiteral) { 216 Left->Type = TT_ObjCArrayLiteral; 217 } 218 219 while (CurrentToken != NULL) { 220 if (CurrentToken->is(tok::r_square)) { 221 if (!CurrentToken->Children.empty() && 222 CurrentToken->Children[0].is(tok::l_paren)) { 223 // An ObjC method call is rarely followed by an open parenthesis. 224 // FIXME: Do we incorrectly label ":" with this? 225 StartsObjCMethodExpr = false; 226 Left->Type = TT_Unknown; 227 } 228 if (StartsObjCMethodExpr) { 229 CurrentToken->Type = TT_ObjCMethodExpr; 230 // determineStarAmpUsage() thinks that '*' '[' is allocating an 231 // array of pointers, but if '[' starts a selector then '*' is a 232 // binary operator. 233 if (Parent != NULL && Parent->Type == TT_PointerOrReference) 234 Parent->Type = TT_BinaryOperator; 235 } else if (StartsObjCArrayLiteral) { 236 CurrentToken->Type = TT_ObjCArrayLiteral; 237 } 238 Left->MatchingParen = CurrentToken; 239 CurrentToken->MatchingParen = Left; 240 if (Contexts.back().FirstObjCSelectorName != NULL) 241 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 242 Contexts.back().LongestObjCSelectorName; 243 next(); 244 return true; 245 } 246 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 247 return false; 248 updateParameterCount(Left, CurrentToken); 249 if (!consumeToken()) 250 return false; 251 } 252 return false; 253 } 254 255 bool parseBrace() { 256 // Lines are fine to end with '{'. 257 if (CurrentToken == NULL) 258 return true; 259 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 260 AnnotatedToken *Left = CurrentToken->Parent; 261 while (CurrentToken != NULL) { 262 if (CurrentToken->is(tok::r_brace)) { 263 Left->MatchingParen = CurrentToken; 264 CurrentToken->MatchingParen = Left; 265 next(); 266 return true; 267 } 268 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 269 return false; 270 updateParameterCount(Left, CurrentToken); 271 if (!consumeToken()) 272 return false; 273 } 274 return true; 275 } 276 277 void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) { 278 if (Current->is(tok::comma)) 279 ++Left->ParameterCount; 280 else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) 281 Left->ParameterCount = 1; 282 } 283 284 bool parseConditional() { 285 while (CurrentToken != NULL) { 286 if (CurrentToken->is(tok::colon)) { 287 CurrentToken->Type = TT_ConditionalExpr; 288 next(); 289 return true; 290 } 291 if (!consumeToken()) 292 return false; 293 } 294 return false; 295 } 296 297 bool parseTemplateDeclaration() { 298 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 299 CurrentToken->Type = TT_TemplateOpener; 300 next(); 301 if (!parseAngle()) 302 return false; 303 if (CurrentToken != NULL) 304 CurrentToken->Parent->ClosesTemplateDeclaration = true; 305 return true; 306 } 307 return false; 308 } 309 310 bool consumeToken() { 311 AnnotatedToken *Tok = CurrentToken; 312 next(); 313 switch (Tok->FormatTok.Tok.getKind()) { 314 case tok::plus: 315 case tok::minus: 316 // At the start of the line, +/- specific ObjectiveC method 317 // declarations. 318 if (Tok->Parent == NULL) 319 Tok->Type = TT_ObjCMethodSpecifier; 320 break; 321 case tok::colon: 322 if (Tok->Parent == NULL) 323 return false; 324 // Colons from ?: are handled in parseConditional(). 325 if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) { 326 Tok->Type = TT_CtorInitializerColon; 327 } else if (Contexts.back().ColonIsObjCMethodExpr || 328 Line.First.Type == TT_ObjCMethodSpecifier) { 329 Tok->Type = TT_ObjCMethodExpr; 330 Tok->Parent->Type = TT_ObjCSelectorName; 331 if (Tok->Parent->FormatTok.TokenLength > 332 Contexts.back().LongestObjCSelectorName) 333 Contexts.back().LongestObjCSelectorName = 334 Tok->Parent->FormatTok.TokenLength; 335 if (Contexts.back().FirstObjCSelectorName == NULL) 336 Contexts.back().FirstObjCSelectorName = Tok->Parent; 337 } else if (Contexts.back().ColonIsForRangeExpr) { 338 Tok->Type = TT_RangeBasedForLoopColon; 339 } else if (Contexts.size() == 1) { 340 Tok->Type = TT_InheritanceColon; 341 } else if (Contexts.back().ContextKind == tok::l_paren) { 342 Tok->Type = TT_InlineASMColon; 343 } 344 break; 345 case tok::kw_if: 346 case tok::kw_while: 347 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 348 next(); 349 if (!parseParens(/*LookForDecls=*/ true)) 350 return false; 351 } 352 break; 353 case tok::kw_for: 354 Contexts.back().ColonIsForRangeExpr = true; 355 next(); 356 if (!parseParens()) 357 return false; 358 break; 359 case tok::l_paren: 360 if (!parseParens()) 361 return false; 362 if (Line.MustBeDeclaration) 363 Line.MightBeFunctionDecl = true; 364 break; 365 case tok::l_square: 366 if (!parseSquare()) 367 return false; 368 break; 369 case tok::l_brace: 370 if (!parseBrace()) 371 return false; 372 break; 373 case tok::less: 374 if (parseAngle()) 375 Tok->Type = TT_TemplateOpener; 376 else { 377 Tok->Type = TT_BinaryOperator; 378 CurrentToken = Tok; 379 next(); 380 } 381 break; 382 case tok::r_paren: 383 case tok::r_square: 384 return false; 385 case tok::r_brace: 386 // Lines can start with '}'. 387 if (Tok->Parent != NULL) 388 return false; 389 break; 390 case tok::greater: 391 Tok->Type = TT_BinaryOperator; 392 break; 393 case tok::kw_operator: 394 while (CurrentToken && CurrentToken->isNot(tok::l_paren)) { 395 if (CurrentToken->isOneOf(tok::star, tok::amp)) 396 CurrentToken->Type = TT_PointerOrReference; 397 consumeToken(); 398 } 399 if (CurrentToken) 400 CurrentToken->Type = TT_OverloadedOperatorLParen; 401 break; 402 case tok::question: 403 parseConditional(); 404 break; 405 case tok::kw_template: 406 parseTemplateDeclaration(); 407 break; 408 case tok::identifier: 409 if (Line.First.is(tok::kw_for) && 410 Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in) 411 Tok->Type = TT_ObjCForIn; 412 break; 413 default: 414 break; 415 } 416 return true; 417 } 418 419 void parseIncludeDirective() { 420 next(); 421 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 422 next(); 423 while (CurrentToken != NULL) { 424 if (CurrentToken->isNot(tok::comment) || 425 !CurrentToken->Children.empty()) 426 CurrentToken->Type = TT_ImplicitStringLiteral; 427 next(); 428 } 429 } else { 430 while (CurrentToken != NULL) { 431 if (CurrentToken->is(tok::string_literal)) 432 // Mark these string literals as "implicit" literals, too, so that 433 // they are not split or line-wrapped. 434 CurrentToken->Type = TT_ImplicitStringLiteral; 435 next(); 436 } 437 } 438 } 439 440 void parseWarningOrError() { 441 next(); 442 // We still want to format the whitespace left of the first token of the 443 // warning or error. 444 next(); 445 while (CurrentToken != NULL) { 446 CurrentToken->Type = TT_ImplicitStringLiteral; 447 next(); 448 } 449 } 450 451 void parsePreprocessorDirective() { 452 next(); 453 if (CurrentToken == NULL) 454 return; 455 // Hashes in the middle of a line can lead to any strange token 456 // sequence. 457 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 458 return; 459 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 460 case tok::pp_include: 461 case tok::pp_import: 462 parseIncludeDirective(); 463 break; 464 case tok::pp_error: 465 case tok::pp_warning: 466 parseWarningOrError(); 467 break; 468 default: 469 break; 470 } 471 while (CurrentToken != NULL) 472 next(); 473 } 474 475 public: 476 LineType parseLine() { 477 int PeriodsAndArrows = 0; 478 AnnotatedToken *LastPeriodOrArrow = NULL; 479 bool CanBeBuilderTypeStmt = true; 480 if (CurrentToken->is(tok::hash)) { 481 parsePreprocessorDirective(); 482 return LT_PreprocessorDirective; 483 } 484 while (CurrentToken != NULL) { 485 if (CurrentToken->is(tok::kw_virtual)) 486 KeywordVirtualFound = true; 487 if (CurrentToken->isOneOf(tok::period, tok::arrow)) { 488 ++PeriodsAndArrows; 489 LastPeriodOrArrow = CurrentToken; 490 } 491 AnnotatedToken *TheToken = CurrentToken; 492 if (!consumeToken()) 493 return LT_Invalid; 494 if (getPrecedence(*TheToken) > prec::Assignment && 495 TheToken->Type == TT_BinaryOperator) 496 CanBeBuilderTypeStmt = false; 497 } 498 if (KeywordVirtualFound) 499 return LT_VirtualFunctionDecl; 500 501 // Assume a builder-type call if there are 2 or more "." and "->". 502 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) { 503 LastPeriodOrArrow->LastInChainOfCalls = true; 504 return LT_BuilderTypeCall; 505 } 506 507 if (Line.First.Type == TT_ObjCMethodSpecifier) { 508 if (Contexts.back().FirstObjCSelectorName != NULL) 509 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 510 Contexts.back().LongestObjCSelectorName; 511 return LT_ObjCMethodDecl; 512 } 513 514 return LT_Other; 515 } 516 517 private: 518 void next() { 519 if (CurrentToken != NULL) { 520 determineTokenType(*CurrentToken); 521 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 522 } 523 524 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 525 CurrentToken = &CurrentToken->Children[0]; 526 else 527 CurrentToken = NULL; 528 529 // Reset token type in case we have already looked at it and then recovered 530 // from an error (e.g. failure to find the matching >). 531 if (CurrentToken != NULL) 532 CurrentToken->Type = TT_Unknown; 533 } 534 535 /// \brief A struct to hold information valid in a specific context, e.g. 536 /// a pair of parenthesis. 537 struct Context { 538 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 539 bool IsExpression) 540 : ContextKind(ContextKind), BindingStrength(BindingStrength), 541 LongestObjCSelectorName(0), ColonIsForRangeExpr(false), 542 ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL), 543 IsExpression(IsExpression), CanBeExpression(true) {} 544 545 tok::TokenKind ContextKind; 546 unsigned BindingStrength; 547 unsigned LongestObjCSelectorName; 548 bool ColonIsForRangeExpr; 549 bool ColonIsObjCMethodExpr; 550 AnnotatedToken *FirstObjCSelectorName; 551 bool IsExpression; 552 bool CanBeExpression; 553 }; 554 555 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 556 /// of each instance. 557 struct ScopedContextCreator { 558 AnnotatingParser &P; 559 560 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 561 unsigned Increase) 562 : P(P) { 563 P.Contexts.push_back( 564 Context(ContextKind, P.Contexts.back().BindingStrength + Increase, 565 P.Contexts.back().IsExpression)); 566 } 567 568 ~ScopedContextCreator() { P.Contexts.pop_back(); } 569 }; 570 571 void determineTokenType(AnnotatedToken &Current) { 572 if (getPrecedence(Current) == prec::Assignment) { 573 Contexts.back().IsExpression = true; 574 for (AnnotatedToken *Previous = Current.Parent; 575 Previous && Previous->isNot(tok::comma); 576 Previous = Previous->Parent) { 577 if (Previous->is(tok::r_square)) 578 Previous = Previous->MatchingParen; 579 if (Previous->Type == TT_BinaryOperator && 580 Previous->isOneOf(tok::star, tok::amp)) { 581 Previous->Type = TT_PointerOrReference; 582 } 583 } 584 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) || 585 (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 586 (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) { 587 Contexts.back().IsExpression = true; 588 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 589 for (AnnotatedToken *Previous = Current.Parent; 590 Previous && Previous->isOneOf(tok::star, tok::amp); 591 Previous = Previous->Parent) 592 Previous->Type = TT_PointerOrReference; 593 } else if (Current.Parent && 594 Current.Parent->Type == TT_CtorInitializerColon) { 595 Contexts.back().IsExpression = true; 596 } else if (Current.is(tok::kw_new)) { 597 Contexts.back().CanBeExpression = false; 598 } 599 600 if (Current.Type == TT_Unknown) { 601 if (Current.Parent && Current.is(tok::identifier) && 602 ((Current.Parent->is(tok::identifier) && 603 Current.Parent->FormatTok.Tok.getIdentifierInfo() 604 ->getPPKeywordID() == tok::pp_not_keyword) || 605 Current.Parent->Type == TT_PointerOrReference || 606 Current.Parent->Type == TT_TemplateCloser)) { 607 Current.Type = TT_StartOfName; 608 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 609 Current.Type = 610 determineStarAmpUsage(Current, Contexts.back().IsExpression); 611 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 612 Current.Type = determinePlusMinusCaretUsage(Current); 613 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 614 Current.Type = determineIncrementUsage(Current); 615 } else if (Current.is(tok::exclaim)) { 616 Current.Type = TT_UnaryOperator; 617 } else if (isBinaryOperator(Current)) { 618 Current.Type = TT_BinaryOperator; 619 } else if (Current.is(tok::comment)) { 620 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 621 Lex.getLangOpts())); 622 if (StringRef(Data).startswith("//")) 623 Current.Type = TT_LineComment; 624 else 625 Current.Type = TT_BlockComment; 626 } else if (Current.is(tok::r_paren)) { 627 bool ParensNotExpr = !Current.Parent || 628 Current.Parent->Type == TT_PointerOrReference || 629 Current.Parent->Type == TT_TemplateCloser; 630 bool ParensCouldEndDecl = 631 !Current.Children.empty() && 632 Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace); 633 bool IsSizeOfOrAlignOf = 634 Current.MatchingParen && Current.MatchingParen->Parent && 635 Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof, 636 tok::kw_alignof); 637 if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 638 Contexts.back().IsExpression) 639 // FIXME: We need to get smarter and understand more cases of casts. 640 Current.Type = TT_CastRParen; 641 } else if (Current.is(tok::at) && Current.Children.size()) { 642 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 643 case tok::objc_interface: 644 case tok::objc_implementation: 645 case tok::objc_protocol: 646 Current.Type = TT_ObjCDecl; 647 break; 648 case tok::objc_property: 649 Current.Type = TT_ObjCProperty; 650 break; 651 default: 652 break; 653 } 654 } 655 } 656 } 657 658 /// \brief Return the type of the given token assuming it is * or &. 659 TokenType 660 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { 661 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 662 if (PrevToken == NULL) 663 return TT_UnaryOperator; 664 665 const AnnotatedToken *NextToken = getNextToken(Tok); 666 if (NextToken == NULL) 667 return TT_Unknown; 668 669 if (PrevToken->is(tok::l_paren) && !IsExpression) 670 return TT_PointerOrReference; 671 672 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 673 tok::comma, tok::semi, tok::kw_return, tok::colon, 674 tok::equal) || 675 PrevToken->Type == TT_BinaryOperator || 676 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 677 return TT_UnaryOperator; 678 679 if (NextToken->is(tok::l_square)) 680 return TT_PointerOrReference; 681 682 if (PrevToken->FormatTok.Tok.isLiteral() || 683 PrevToken->isOneOf(tok::r_paren, tok::r_square) || 684 NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken) || 685 NextToken->isOneOf(tok::l_paren, tok::l_square)) 686 return TT_BinaryOperator; 687 688 // It is very unlikely that we are going to find a pointer or reference type 689 // definition on the RHS of an assignment. 690 if (IsExpression) 691 return TT_BinaryOperator; 692 693 return TT_PointerOrReference; 694 } 695 696 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 697 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 698 if (PrevToken == NULL) 699 return TT_UnaryOperator; 700 701 // Use heuristics to recognize unary operators. 702 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 703 tok::question, tok::colon, tok::kw_return, 704 tok::kw_case, tok::at, tok::l_brace)) 705 return TT_UnaryOperator; 706 707 // There can't be two consecutive binary operators. 708 if (PrevToken->Type == TT_BinaryOperator) 709 return TT_UnaryOperator; 710 711 // Fall back to marking the token as binary operator. 712 return TT_BinaryOperator; 713 } 714 715 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 716 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 717 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 718 if (PrevToken == NULL) 719 return TT_UnaryOperator; 720 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 721 return TT_TrailingUnaryOperator; 722 723 return TT_UnaryOperator; 724 } 725 726 SmallVector<Context, 8> Contexts; 727 728 SourceManager &SourceMgr; 729 Lexer &Lex; 730 AnnotatedLine &Line; 731 AnnotatedToken *CurrentToken; 732 bool KeywordVirtualFound; 733 IdentifierInfo &Ident_in; 734 }; 735 736 /// \brief Parses binary expressions by inserting fake parenthesis based on 737 /// operator precedence. 738 class ExpressionParser { 739 public: 740 ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {} 741 742 /// \brief Parse expressions with the given operatore precedence. 743 void parse(int Precedence = 0) { 744 if (Precedence > prec::PointerToMember || Current == NULL) 745 return; 746 747 // Skip over "return" until we can properly parse it. 748 if (Current->is(tok::kw_return)) 749 next(); 750 751 // Eagerly consume trailing comments. 752 while (isTrailingComment(Current)) { 753 next(); 754 } 755 756 AnnotatedToken *Start = Current; 757 bool OperatorFound = false; 758 759 while (Current) { 760 // Consume operators with higher precedence. 761 parse(prec::Level(Precedence + 1)); 762 763 int CurrentPrecedence = 0; 764 if (Current) { 765 if (Current->Type == TT_ConditionalExpr) 766 CurrentPrecedence = 1 + (int) prec::Conditional; 767 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || 768 Current->Type == TT_CtorInitializerColon) 769 CurrentPrecedence = 1; 770 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 771 CurrentPrecedence = 1 + (int) getPrecedence(*Current); 772 } 773 774 // At the end of the line or when an operator with higher precedence is 775 // found, insert fake parenthesis and return. 776 if (Current == NULL || closesScope(*Current) || 777 (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) { 778 if (OperatorFound) { 779 ++Start->FakeLParens; 780 if (Current) 781 ++Current->Parent->FakeRParens; 782 } 783 return; 784 } 785 786 // Consume scopes: (), [], <> and {} 787 if (opensScope(*Current)) { 788 AnnotatedToken *Left = Current; 789 while (Current && !closesScope(*Current)) { 790 next(); 791 parse(); 792 } 793 // Remove fake parens that just duplicate the real parens. 794 if (Current && Left->Children[0].FakeLParens > 0 && 795 Current->Parent->FakeRParens > 0) { 796 --Left->Children[0].FakeLParens; 797 --Current->Parent->FakeRParens; 798 } 799 next(); 800 } else { 801 // Operator found. 802 if (CurrentPrecedence == Precedence) 803 OperatorFound = true; 804 805 next(); 806 } 807 } 808 } 809 810 private: 811 void next() { 812 if (Current != NULL) 813 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 814 } 815 816 AnnotatedToken *Current; 817 }; 818 819 void TokenAnnotator::annotate(AnnotatedLine &Line) { 820 AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in); 821 Line.Type = Parser.parseLine(); 822 if (Line.Type == LT_Invalid) 823 return; 824 825 ExpressionParser ExprParser(Line); 826 ExprParser.parse(); 827 828 if (Line.First.Type == TT_ObjCMethodSpecifier) 829 Line.Type = LT_ObjCMethodDecl; 830 else if (Line.First.Type == TT_ObjCDecl) 831 Line.Type = LT_ObjCDecl; 832 else if (Line.First.Type == TT_ObjCProperty) 833 Line.Type = LT_ObjCProperty; 834 835 Line.First.SpacesRequiredBefore = 1; 836 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; 837 Line.First.CanBreakBefore = Line.First.MustBreakBefore; 838 839 Line.First.TotalLength = Line.First.FormatTok.TokenLength; 840 } 841 842 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 843 if (Line.First.Children.empty()) 844 return; 845 AnnotatedToken *Current = &Line.First.Children[0]; 846 while (Current != NULL) { 847 if (Current->Type == TT_LineComment) 848 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 849 else 850 Current->SpacesRequiredBefore = 851 spaceRequiredBefore(Line, *Current) ? 1 : 0; 852 853 if (Current->FormatTok.MustBreakBefore) { 854 Current->MustBreakBefore = true; 855 } else if (Current->Type == TT_LineComment) { 856 Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; 857 } else if (isTrailingComment(Current->Parent) || 858 (Current->is(tok::string_literal) && 859 Current->Parent->is(tok::string_literal))) { 860 Current->MustBreakBefore = true; 861 } else if (Current->is(tok::lessless) && !Current->Children.empty() && 862 Current->Parent->is(tok::string_literal) && 863 Current->Children[0].is(tok::string_literal)) { 864 Current->MustBreakBefore = true; 865 } else { 866 Current->MustBreakBefore = false; 867 } 868 Current->CanBreakBefore = 869 Current->MustBreakBefore || canBreakBefore(Line, *Current); 870 if (Current->MustBreakBefore) 871 Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit; 872 else 873 Current->TotalLength = 874 Current->Parent->TotalLength + Current->FormatTok.TokenLength + 875 Current->SpacesRequiredBefore; 876 // FIXME: Only calculate this if CanBreakBefore is true once static 877 // initializers etc. are sorted out. 878 // FIXME: Move magic numbers to a better place. 879 Current->SplitPenalty = 880 20 * Current->BindingStrength + splitPenalty(Line, *Current); 881 882 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 883 } 884 } 885 886 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 887 const AnnotatedToken &Tok) { 888 const AnnotatedToken &Left = *Tok.Parent; 889 const AnnotatedToken &Right = Tok; 890 891 if (Right.Type == TT_StartOfName) { 892 if (Line.First.is(tok::kw_for)) 893 return 3; 894 else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1) 895 // FIXME: Clean up hack of using BindingStrength to find top-level names. 896 return Style.PenaltyReturnTypeOnItsOwnLine; 897 else 898 return 100; 899 } 900 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 901 return 150; 902 if (Left.is(tok::coloncolon)) 903 return 500; 904 905 if (Left.Type == TT_RangeBasedForLoopColon || 906 Left.Type == TT_InheritanceColon) 907 return 2; 908 909 if (Right.isOneOf(tok::arrow, tok::period)) { 910 if (Line.Type == LT_BuilderTypeCall) 911 return prec::PointerToMember; 912 if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen && 913 Left.MatchingParen->ParameterCount > 0) 914 return 20; // Should be smaller than breaking at a nested comma. 915 return 150; 916 } 917 918 // In for-loops, prefer breaking at ',' and ';'. 919 if (Line.First.is(tok::kw_for) && Left.is(tok::equal)) 920 return 4; 921 922 if (Left.is(tok::semi)) 923 return 0; 924 if (Left.is(tok::comma)) 925 return 1; 926 927 // In Objective-C method expressions, prefer breaking before "param:" over 928 // breaking after it. 929 if (Right.Type == TT_ObjCSelectorName) 930 return 0; 931 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 932 return 20; 933 934 if (opensScope(Left)) 935 return 20; 936 937 if (Right.is(tok::lessless)) { 938 if (Left.is(tok::string_literal)) { 939 StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(), 940 Left.FormatTok.TokenLength); 941 Content = Content.drop_back(1).drop_front(1).trim(); 942 if (Content.size() > 1 && 943 (Content.back() == ':' || Content.back() == '=')) 944 return 100; 945 } 946 return prec::Shift; 947 } 948 if (Left.Type == TT_ConditionalExpr) 949 return prec::Conditional; 950 prec::Level Level = getPrecedence(Left); 951 952 if (Level != prec::Unknown) 953 return Level; 954 955 return 3; 956 } 957 958 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 959 const AnnotatedToken &Left, 960 const AnnotatedToken &Right) { 961 if (Right.is(tok::hashhash)) 962 return Left.is(tok::hash); 963 if (Left.isOneOf(tok::hashhash, tok::hash)) 964 return Right.is(tok::hash); 965 if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma)) 966 return false; 967 if (Right.is(tok::less) && 968 (Left.is(tok::kw_template) || 969 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 970 return true; 971 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 972 return false; 973 if (Left.isOneOf(tok::exclaim, tok::tilde)) 974 return false; 975 if (Left.is(tok::at) && 976 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 977 tok::numeric_constant, tok::l_paren, tok::l_brace, 978 tok::kw_true, tok::kw_false)) 979 return false; 980 if (Left.is(tok::coloncolon)) 981 return false; 982 if (Right.is(tok::coloncolon)) 983 return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren); 984 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 985 return false; 986 if (Right.Type == TT_PointerOrReference) 987 return Left.FormatTok.Tok.isLiteral() || 988 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && 989 !Style.PointerBindsToType); 990 if (Left.Type == TT_PointerOrReference) 991 return Right.FormatTok.Tok.isLiteral() || 992 ((Right.Type != TT_PointerOrReference) && Style.PointerBindsToType); 993 if (Right.is(tok::star) && Left.is(tok::l_paren)) 994 return false; 995 if (Left.is(tok::l_square)) 996 return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square); 997 if (Right.is(tok::r_square)) 998 return Right.Type == TT_ObjCArrayLiteral; 999 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) 1000 return false; 1001 if (Left.is(tok::period) || Right.is(tok::period)) 1002 return false; 1003 if (Left.is(tok::colon)) 1004 return Left.Type != TT_ObjCMethodExpr; 1005 if (Right.is(tok::colon)) 1006 return Right.Type != TT_ObjCMethodExpr; 1007 if (Left.is(tok::l_paren)) 1008 return false; 1009 if (Right.is(tok::l_paren)) { 1010 return Line.Type == LT_ObjCDecl || 1011 Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, 1012 tok::kw_return, tok::kw_catch, tok::kw_new, 1013 tok::kw_delete); 1014 } 1015 if (Left.is(tok::at) && 1016 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1017 return false; 1018 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1019 return false; 1020 return true; 1021 } 1022 1023 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1024 const AnnotatedToken &Tok) { 1025 if (Tok.FormatTok.Tok.getIdentifierInfo() && 1026 Tok.Parent->FormatTok.Tok.getIdentifierInfo()) 1027 return true; // Never ever merge two identifiers. 1028 if (Line.Type == LT_ObjCMethodDecl) { 1029 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1030 return true; 1031 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1032 // Don't space between ')' and <id> 1033 return false; 1034 } 1035 if (Line.Type == LT_ObjCProperty && 1036 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1037 return false; 1038 1039 if (Tok.Parent->is(tok::comma)) 1040 return true; 1041 if (Tok.is(tok::comma)) 1042 return false; 1043 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1044 return true; 1045 if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator)) 1046 return false; 1047 if (Tok.Type == TT_OverloadedOperatorLParen) 1048 return false; 1049 if (Tok.is(tok::colon)) 1050 return !Line.First.isOneOf(tok::kw_case, tok::kw_default) && 1051 !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr; 1052 if (Tok.is(tok::l_paren) && !Tok.Children.empty() && 1053 Tok.Children[0].Type == TT_PointerOrReference && 1054 !Tok.Children[0].Children.empty() && 1055 Tok.Children[0].Children[0].isNot(tok::r_paren)) 1056 return true; 1057 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) 1058 return false; 1059 if (Tok.Type == TT_UnaryOperator) 1060 return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) && 1061 (Tok.Parent->isNot(tok::colon) || 1062 Tok.Parent->Type != TT_ObjCMethodExpr); 1063 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1064 return Tok.Type == TT_TemplateCloser && 1065 Tok.Parent->Type == TT_TemplateCloser && 1066 Style.Standard != FormatStyle::LS_Cpp11; 1067 } 1068 if (Tok.is(tok::arrowstar) || Tok.Parent->is(tok::arrowstar)) 1069 return false; 1070 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1071 return true; 1072 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1073 return false; 1074 if (Tok.is(tok::less) && Line.First.is(tok::hash)) 1075 return true; 1076 if (Tok.Type == TT_TrailingUnaryOperator) 1077 return false; 1078 return spaceRequiredBetween(Line, *Tok.Parent, Tok); 1079 } 1080 1081 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 1082 const AnnotatedToken &Right) { 1083 const AnnotatedToken &Left = *Right.Parent; 1084 if (Right.Type == TT_StartOfName) 1085 return true; 1086 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 1087 return false; 1088 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1089 return true; 1090 if (Right.Type == TT_ObjCSelectorName) 1091 return true; 1092 if (Left.ClosesTemplateDeclaration) 1093 return true; 1094 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1095 return true; 1096 if (Right.Type == TT_RangeBasedForLoopColon || 1097 Right.Type == TT_InheritanceColon) 1098 return false; 1099 if (Left.Type == TT_RangeBasedForLoopColon || 1100 Left.Type == TT_InheritanceColon) 1101 return true; 1102 if (Right.Type == TT_RangeBasedForLoopColon) 1103 return false; 1104 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1105 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || 1106 Left.isOneOf(tok::question, tok::kw_operator)) 1107 return false; 1108 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1109 return false; 1110 if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent && 1111 Left.Parent->is(tok::kw___attribute)) 1112 return false; 1113 1114 if (Right.Type == TT_LineComment) 1115 // We rely on MustBreakBefore being set correctly here as we should not 1116 // change the "binding" behavior of a comment. 1117 return false; 1118 1119 // Allow breaking after a trailing 'const', e.g. after a method declaration, 1120 // unless it is follow by ';', '{' or '='. 1121 if (Left.is(tok::kw_const) && Left.Parent != NULL && 1122 Left.Parent->is(tok::r_paren)) 1123 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal); 1124 1125 if (Right.is(tok::kw___attribute)) 1126 return true; 1127 1128 // We only break before r_brace if there was a corresponding break before 1129 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1130 if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater)) 1131 return false; 1132 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 1133 return true; 1134 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || 1135 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) || 1136 Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) || 1137 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1138 Right.isOneOf(tok::identifier, tok::kw___attribute)) || 1139 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || 1140 (Left.is(tok::l_square) && !Right.is(tok::r_square)); 1141 } 1142 1143 } // namespace format 1144 } // namespace clang 1145