1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-parser" 17 18 #include "UnwrappedLineParser.h" 19 #include "llvm/Support/Debug.h" 20 21 namespace clang { 22 namespace format { 23 24 class FormatTokenSource { 25 public: 26 virtual ~FormatTokenSource() {} 27 virtual FormatToken *getNextToken() = 0; 28 29 virtual unsigned getPosition() = 0; 30 virtual FormatToken *setPosition(unsigned Position) = 0; 31 }; 32 33 namespace { 34 35 class ScopedDeclarationState { 36 public: 37 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 38 bool MustBeDeclaration) 39 : Line(Line), Stack(Stack) { 40 Line.MustBeDeclaration = MustBeDeclaration; 41 Stack.push_back(MustBeDeclaration); 42 } 43 ~ScopedDeclarationState() { 44 Stack.pop_back(); 45 if (!Stack.empty()) 46 Line.MustBeDeclaration = Stack.back(); 47 else 48 Line.MustBeDeclaration = true; 49 } 50 51 private: 52 UnwrappedLine &Line; 53 std::vector<bool> &Stack; 54 }; 55 56 class ScopedMacroState : public FormatTokenSource { 57 public: 58 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 59 FormatToken *&ResetToken, bool &StructuralError) 60 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 61 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 62 StructuralError(StructuralError), 63 PreviousStructuralError(StructuralError), Token(NULL) { 64 TokenSource = this; 65 Line.Level = 0; 66 Line.InPPDirective = true; 67 } 68 69 ~ScopedMacroState() { 70 TokenSource = PreviousTokenSource; 71 ResetToken = Token; 72 Line.InPPDirective = false; 73 Line.Level = PreviousLineLevel; 74 StructuralError = PreviousStructuralError; 75 } 76 77 virtual FormatToken *getNextToken() { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); } 88 89 virtual FormatToken *setPosition(unsigned Position) { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94 private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 bool &StructuralError; 114 bool PreviousStructuralError; 115 116 FormatToken *Token; 117 }; 118 119 } // end anonymous namespace 120 121 class ScopedLineState { 122 public: 123 ScopedLineState(UnwrappedLineParser &Parser, 124 bool SwitchToPreprocessorLines = false) 125 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 126 if (SwitchToPreprocessorLines) 127 Parser.CurrentLines = &Parser.PreprocessorDirectives; 128 PreBlockLine = Parser.Line.take(); 129 Parser.Line.reset(new UnwrappedLine()); 130 Parser.Line->Level = PreBlockLine->Level; 131 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 132 } 133 134 ~ScopedLineState() { 135 if (!Parser.Line->Tokens.empty()) { 136 Parser.addUnwrappedLine(); 137 } 138 assert(Parser.Line->Tokens.empty()); 139 Parser.Line.reset(PreBlockLine); 140 Parser.MustBreakBeforeNextToken = true; 141 if (SwitchToPreprocessorLines) 142 Parser.CurrentLines = &Parser.Lines; 143 } 144 145 private: 146 UnwrappedLineParser &Parser; 147 const bool SwitchToPreprocessorLines; 148 149 UnwrappedLine *PreBlockLine; 150 }; 151 152 namespace { 153 154 class IndexedTokenSource : public FormatTokenSource { 155 public: 156 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 157 : Tokens(Tokens), Position(-1) {} 158 159 virtual FormatToken *getNextToken() { 160 ++Position; 161 return Tokens[Position]; 162 } 163 164 virtual unsigned getPosition() { 165 assert(Position >= 0); 166 return Position; 167 } 168 169 virtual FormatToken *setPosition(unsigned P) { 170 Position = P; 171 return Tokens[Position]; 172 } 173 174 private: 175 ArrayRef<FormatToken *> Tokens; 176 int Position; 177 }; 178 179 } // end anonymous namespace 180 181 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 182 ArrayRef<FormatToken *> Tokens, 183 UnwrappedLineConsumer &Callback) 184 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 185 CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL), 186 Callback(Callback), AllTokens(Tokens) {} 187 188 bool UnwrappedLineParser::parse() { 189 DEBUG(llvm::dbgs() << "----\n"); 190 IndexedTokenSource TokenSource(AllTokens); 191 Tokens = &TokenSource; 192 readToken(); 193 parseFile(); 194 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); 195 I != E; ++I) { 196 Callback.consumeUnwrappedLine(*I); 197 } 198 199 // Create line with eof token. 200 pushToken(FormatTok); 201 Callback.consumeUnwrappedLine(*Line); 202 return StructuralError; 203 } 204 205 void UnwrappedLineParser::parseFile() { 206 ScopedDeclarationState DeclarationState( 207 *Line, DeclarationScopeStack, 208 /*MustBeDeclaration=*/ !Line->InPPDirective); 209 parseLevel(/*HasOpeningBrace=*/false); 210 // Make sure to format the remaining tokens. 211 flushComments(true); 212 addUnwrappedLine(); 213 } 214 215 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 216 bool SwitchLabelEncountered = false; 217 do { 218 switch (FormatTok->Tok.getKind()) { 219 case tok::comment: 220 nextToken(); 221 addUnwrappedLine(); 222 break; 223 case tok::l_brace: 224 // FIXME: Add parameter whether this can happen - if this happens, we must 225 // be in a non-declaration context. 226 parseBlock(/*MustBeDeclaration=*/false); 227 addUnwrappedLine(); 228 break; 229 case tok::r_brace: 230 if (HasOpeningBrace) 231 return; 232 StructuralError = true; 233 nextToken(); 234 addUnwrappedLine(); 235 break; 236 case tok::kw_default: 237 case tok::kw_case: 238 if (!SwitchLabelEncountered) 239 Line->Level += Style.IndentCaseLabels; 240 SwitchLabelEncountered = true; 241 parseStructuralElement(); 242 break; 243 default: 244 parseStructuralElement(); 245 break; 246 } 247 } while (!eof()); 248 } 249 250 void UnwrappedLineParser::calculateBraceTypes() { 251 // We'll parse forward through the tokens until we hit 252 // a closing brace or eof - note that getNextToken() will 253 // parse macros, so this will magically work inside macro 254 // definitions, too. 255 unsigned StoredPosition = Tokens->getPosition(); 256 unsigned Position = StoredPosition; 257 FormatToken *Tok = FormatTok; 258 // Keep a stack of positions of lbrace tokens. We will 259 // update information about whether an lbrace starts a 260 // braced init list or a different block during the loop. 261 SmallVector<FormatToken *, 8> LBraceStack; 262 assert(Tok->Tok.is(tok::l_brace)); 263 do { 264 // Get next none-comment token. 265 FormatToken *NextTok; 266 unsigned ReadTokens = 0; 267 do { 268 NextTok = Tokens->getNextToken(); 269 ++ReadTokens; 270 } while (NextTok->is(tok::comment)); 271 272 switch (Tok->Tok.getKind()) { 273 case tok::l_brace: 274 LBraceStack.push_back(Tok); 275 break; 276 case tok::r_brace: 277 if (!LBraceStack.empty()) { 278 if (LBraceStack.back()->BlockKind == BK_Unknown) { 279 // If there is a comma, semicolon or right paren after the closing 280 // brace, we assume this is a braced initializer list. 281 282 // FIXME: Note that this currently works only because we do not 283 // use the brace information while inside a braced init list. 284 // Thus, if the parent is a braced init list, we consider all 285 // brace blocks inside it braced init list. That works good enough 286 // for now, but we will need to fix it to correctly handle lambdas. 287 if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren, 288 tok::l_brace, tok::colon)) { 289 Tok->BlockKind = BK_BracedInit; 290 LBraceStack.back()->BlockKind = BK_BracedInit; 291 } else { 292 Tok->BlockKind = BK_Block; 293 LBraceStack.back()->BlockKind = BK_Block; 294 } 295 } 296 LBraceStack.pop_back(); 297 } 298 break; 299 case tok::semi: 300 case tok::kw_if: 301 case tok::kw_while: 302 case tok::kw_for: 303 case tok::kw_switch: 304 case tok::kw_try: 305 if (!LBraceStack.empty()) 306 LBraceStack.back()->BlockKind = BK_Block; 307 break; 308 default: 309 break; 310 } 311 Tok = NextTok; 312 Position += ReadTokens; 313 } while (Tok->Tok.isNot(tok::eof)); 314 // Assume other blocks for all unclosed opening braces. 315 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 316 if (LBraceStack[i]->BlockKind == BK_Unknown) 317 LBraceStack[i]->BlockKind = BK_Block; 318 } 319 FormatTok = Tokens->setPosition(StoredPosition); 320 } 321 322 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel) { 323 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 324 unsigned InitialLevel = Line->Level; 325 nextToken(); 326 327 addUnwrappedLine(); 328 329 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 330 MustBeDeclaration); 331 if (AddLevel) 332 ++Line->Level; 333 parseLevel(/*HasOpeningBrace=*/true); 334 335 if (!FormatTok->Tok.is(tok::r_brace)) { 336 Line->Level = InitialLevel; 337 StructuralError = true; 338 return; 339 } 340 341 nextToken(); // Munch the closing brace. 342 Line->Level = InitialLevel; 343 } 344 345 void UnwrappedLineParser::parsePPDirective() { 346 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 347 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 348 nextToken(); 349 350 if (FormatTok->Tok.getIdentifierInfo() == NULL) { 351 parsePPUnknown(); 352 return; 353 } 354 355 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 356 case tok::pp_define: 357 parsePPDefine(); 358 return; 359 case tok::pp_if: 360 parsePPIf(); 361 break; 362 case tok::pp_ifdef: 363 case tok::pp_ifndef: 364 parsePPIfdef(); 365 break; 366 case tok::pp_else: 367 parsePPElse(); 368 break; 369 case tok::pp_elif: 370 parsePPElIf(); 371 break; 372 case tok::pp_endif: 373 parsePPEndIf(); 374 break; 375 default: 376 parsePPUnknown(); 377 break; 378 } 379 } 380 381 void UnwrappedLineParser::pushPPConditional() { 382 if (!PPStack.empty() && PPStack.back() == PP_Unreachable) 383 PPStack.push_back(PP_Unreachable); 384 else 385 PPStack.push_back(PP_Conditional); 386 } 387 388 void UnwrappedLineParser::parsePPIf() { 389 nextToken(); 390 if ((FormatTok->Tok.isLiteral() && 391 StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) == 392 "0") || 393 FormatTok->Tok.is(tok::kw_false)) { 394 PPStack.push_back(PP_Unreachable); 395 } else { 396 pushPPConditional(); 397 } 398 parsePPUnknown(); 399 } 400 401 void UnwrappedLineParser::parsePPIfdef() { 402 pushPPConditional(); 403 parsePPUnknown(); 404 } 405 406 void UnwrappedLineParser::parsePPElse() { 407 if (!PPStack.empty()) 408 PPStack.pop_back(); 409 pushPPConditional(); 410 parsePPUnknown(); 411 } 412 413 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 414 415 void UnwrappedLineParser::parsePPEndIf() { 416 if (!PPStack.empty()) 417 PPStack.pop_back(); 418 parsePPUnknown(); 419 } 420 421 void UnwrappedLineParser::parsePPDefine() { 422 nextToken(); 423 424 if (FormatTok->Tok.getKind() != tok::identifier) { 425 parsePPUnknown(); 426 return; 427 } 428 nextToken(); 429 if (FormatTok->Tok.getKind() == tok::l_paren && 430 FormatTok->WhitespaceRange.getBegin() == 431 FormatTok->WhitespaceRange.getEnd()) { 432 parseParens(); 433 } 434 addUnwrappedLine(); 435 Line->Level = 1; 436 437 // Errors during a preprocessor directive can only affect the layout of the 438 // preprocessor directive, and thus we ignore them. An alternative approach 439 // would be to use the same approach we use on the file level (no 440 // re-indentation if there was a structural error) within the macro 441 // definition. 442 parseFile(); 443 } 444 445 void UnwrappedLineParser::parsePPUnknown() { 446 do { 447 nextToken(); 448 } while (!eof()); 449 addUnwrappedLine(); 450 } 451 452 // Here we blacklist certain tokens that are not usually the first token in an 453 // unwrapped line. This is used in attempt to distinguish macro calls without 454 // trailing semicolons from other constructs split to several lines. 455 bool tokenCanStartNewLine(clang::Token Tok) { 456 // Semicolon can be a null-statement, l_square can be a start of a macro or 457 // a C++11 attribute, but this doesn't seem to be common. 458 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 459 Tok.isNot(tok::l_square) && 460 // Tokens that can only be used as binary operators and a part of 461 // overloaded operator names. 462 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 463 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 464 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 465 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 466 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 467 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 468 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 469 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 470 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 471 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 472 Tok.isNot(tok::lesslessequal) && 473 // Colon is used in labels, base class lists, initializer lists, 474 // range-based for loops, ternary operator, but should never be the 475 // first token in an unwrapped line. 476 Tok.isNot(tok::colon); 477 } 478 479 void UnwrappedLineParser::parseStructuralElement() { 480 assert(!FormatTok->Tok.is(tok::l_brace)); 481 switch (FormatTok->Tok.getKind()) { 482 case tok::at: 483 nextToken(); 484 if (FormatTok->Tok.is(tok::l_brace)) { 485 parseBracedList(); 486 break; 487 } 488 switch (FormatTok->Tok.getObjCKeywordID()) { 489 case tok::objc_public: 490 case tok::objc_protected: 491 case tok::objc_package: 492 case tok::objc_private: 493 return parseAccessSpecifier(); 494 case tok::objc_interface: 495 case tok::objc_implementation: 496 return parseObjCInterfaceOrImplementation(); 497 case tok::objc_protocol: 498 return parseObjCProtocol(); 499 case tok::objc_end: 500 return; // Handled by the caller. 501 case tok::objc_optional: 502 case tok::objc_required: 503 nextToken(); 504 addUnwrappedLine(); 505 return; 506 default: 507 break; 508 } 509 break; 510 case tok::kw_namespace: 511 parseNamespace(); 512 return; 513 case tok::kw_inline: 514 nextToken(); 515 if (FormatTok->Tok.is(tok::kw_namespace)) { 516 parseNamespace(); 517 return; 518 } 519 break; 520 case tok::kw_public: 521 case tok::kw_protected: 522 case tok::kw_private: 523 parseAccessSpecifier(); 524 return; 525 case tok::kw_if: 526 parseIfThenElse(); 527 return; 528 case tok::kw_for: 529 case tok::kw_while: 530 parseForOrWhileLoop(); 531 return; 532 case tok::kw_do: 533 parseDoWhile(); 534 return; 535 case tok::kw_switch: 536 parseSwitch(); 537 return; 538 case tok::kw_default: 539 nextToken(); 540 parseLabel(); 541 return; 542 case tok::kw_case: 543 parseCaseLabel(); 544 return; 545 case tok::kw_return: 546 parseReturn(); 547 return; 548 case tok::kw_extern: 549 nextToken(); 550 if (FormatTok->Tok.is(tok::string_literal)) { 551 nextToken(); 552 if (FormatTok->Tok.is(tok::l_brace)) { 553 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 554 addUnwrappedLine(); 555 return; 556 } 557 } 558 // In all other cases, parse the declaration. 559 break; 560 default: 561 break; 562 } 563 do { 564 switch (FormatTok->Tok.getKind()) { 565 case tok::at: 566 nextToken(); 567 if (FormatTok->Tok.is(tok::l_brace)) 568 parseBracedList(); 569 break; 570 case tok::kw_enum: 571 parseEnum(); 572 break; 573 case tok::kw_struct: 574 case tok::kw_union: 575 case tok::kw_class: 576 parseRecord(); 577 // A record declaration or definition is always the start of a structural 578 // element. 579 break; 580 case tok::semi: 581 nextToken(); 582 addUnwrappedLine(); 583 return; 584 case tok::r_brace: 585 addUnwrappedLine(); 586 return; 587 case tok::l_paren: 588 parseParens(); 589 break; 590 case tok::l_brace: 591 if (!tryToParseBracedList()) { 592 // A block outside of parentheses must be the last part of a 593 // structural element. 594 // FIXME: Figure out cases where this is not true, and add projections 595 // for them (the one we know is missing are lambdas). 596 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 597 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup || 598 Style.BreakBeforeBraces == FormatStyle::BS_Allman) 599 addUnwrappedLine(); 600 parseBlock(/*MustBeDeclaration=*/false); 601 addUnwrappedLine(); 602 return; 603 } 604 // Otherwise this was a braced init list, and the structural 605 // element continues. 606 break; 607 case tok::identifier: { 608 StringRef Text = FormatTok->TokenText; 609 nextToken(); 610 if (Line->Tokens.size() == 1) { 611 if (FormatTok->Tok.is(tok::colon)) { 612 parseLabel(); 613 return; 614 } 615 // Recognize function-like macro usages without trailing semicolon. 616 if (FormatTok->Tok.is(tok::l_paren)) { 617 parseParens(); 618 if (FormatTok->HasUnescapedNewline && 619 tokenCanStartNewLine(FormatTok->Tok)) { 620 addUnwrappedLine(); 621 return; 622 } 623 } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 && 624 Text == Text.upper()) { 625 // Recognize free-standing macros like Q_OBJECT. 626 addUnwrappedLine(); 627 return; 628 } 629 } 630 break; 631 } 632 case tok::equal: 633 nextToken(); 634 if (FormatTok->Tok.is(tok::l_brace)) { 635 parseBracedList(); 636 } 637 break; 638 default: 639 nextToken(); 640 break; 641 } 642 } while (!eof()); 643 } 644 645 bool UnwrappedLineParser::tryToParseBracedList() { 646 if (FormatTok->BlockKind == BK_Unknown) 647 calculateBraceTypes(); 648 assert(FormatTok->BlockKind != BK_Unknown); 649 if (FormatTok->BlockKind == BK_Block) 650 return false; 651 parseBracedList(); 652 return true; 653 } 654 655 void UnwrappedLineParser::parseBracedList() { 656 nextToken(); 657 658 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 659 // replace this by using parseAssigmentExpression() inside. 660 do { 661 // FIXME: When we start to support lambdas, we'll want to parse them away 662 // here, otherwise our bail-out scenarios below break. The better solution 663 // might be to just implement a more or less complete expression parser. 664 switch (FormatTok->Tok.getKind()) { 665 case tok::l_brace: 666 parseBracedList(); 667 break; 668 case tok::r_brace: 669 nextToken(); 670 return; 671 case tok::semi: 672 // Probably a missing closing brace. Bail out. 673 return; 674 case tok::comma: 675 nextToken(); 676 break; 677 default: 678 nextToken(); 679 break; 680 } 681 } while (!eof()); 682 } 683 684 void UnwrappedLineParser::parseReturn() { 685 nextToken(); 686 687 do { 688 switch (FormatTok->Tok.getKind()) { 689 case tok::l_brace: 690 parseBracedList(); 691 if (FormatTok->Tok.isNot(tok::semi)) { 692 // Assume missing ';'. 693 addUnwrappedLine(); 694 return; 695 } 696 break; 697 case tok::l_paren: 698 parseParens(); 699 break; 700 case tok::r_brace: 701 // Assume missing ';'. 702 addUnwrappedLine(); 703 return; 704 case tok::semi: 705 nextToken(); 706 addUnwrappedLine(); 707 return; 708 default: 709 nextToken(); 710 break; 711 } 712 } while (!eof()); 713 } 714 715 void UnwrappedLineParser::parseParens() { 716 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 717 nextToken(); 718 do { 719 switch (FormatTok->Tok.getKind()) { 720 case tok::l_paren: 721 parseParens(); 722 break; 723 case tok::r_paren: 724 nextToken(); 725 return; 726 case tok::r_brace: 727 // A "}" inside parenthesis is an error if there wasn't a matching "{". 728 return; 729 case tok::l_brace: { 730 if (!tryToParseBracedList()) { 731 nextToken(); 732 { 733 ScopedLineState LineState(*this); 734 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 735 /*MustBeDeclaration=*/false); 736 Line->Level += 1; 737 parseLevel(/*HasOpeningBrace=*/true); 738 Line->Level -= 1; 739 } 740 nextToken(); 741 } 742 break; 743 } 744 case tok::at: 745 nextToken(); 746 if (FormatTok->Tok.is(tok::l_brace)) 747 parseBracedList(); 748 break; 749 default: 750 nextToken(); 751 break; 752 } 753 } while (!eof()); 754 } 755 756 void UnwrappedLineParser::parseIfThenElse() { 757 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 758 nextToken(); 759 if (FormatTok->Tok.is(tok::l_paren)) 760 parseParens(); 761 bool NeedsUnwrappedLine = false; 762 if (FormatTok->Tok.is(tok::l_brace)) { 763 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 764 addUnwrappedLine(); 765 parseBlock(/*MustBeDeclaration=*/false); 766 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 767 addUnwrappedLine(); 768 else 769 NeedsUnwrappedLine = true; 770 } else { 771 addUnwrappedLine(); 772 ++Line->Level; 773 parseStructuralElement(); 774 --Line->Level; 775 } 776 if (FormatTok->Tok.is(tok::kw_else)) { 777 nextToken(); 778 if (FormatTok->Tok.is(tok::l_brace)) { 779 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 780 addUnwrappedLine(); 781 parseBlock(/*MustBeDeclaration=*/false); 782 addUnwrappedLine(); 783 } else if (FormatTok->Tok.is(tok::kw_if)) { 784 parseIfThenElse(); 785 } else { 786 addUnwrappedLine(); 787 ++Line->Level; 788 parseStructuralElement(); 789 --Line->Level; 790 } 791 } else if (NeedsUnwrappedLine) { 792 addUnwrappedLine(); 793 } 794 } 795 796 void UnwrappedLineParser::parseNamespace() { 797 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 798 nextToken(); 799 if (FormatTok->Tok.is(tok::identifier)) 800 nextToken(); 801 if (FormatTok->Tok.is(tok::l_brace)) { 802 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 803 Style.BreakBeforeBraces == FormatStyle::BS_Allman) 804 addUnwrappedLine(); 805 806 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 807 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 808 DeclarationScopeStack.size() > 1); 809 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 810 // Munch the semicolon after a namespace. This is more common than one would 811 // think. Puttin the semicolon into its own line is very ugly. 812 if (FormatTok->Tok.is(tok::semi)) 813 nextToken(); 814 addUnwrappedLine(); 815 } 816 // FIXME: Add error handling. 817 } 818 819 void UnwrappedLineParser::parseForOrWhileLoop() { 820 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) && 821 "'for' or 'while' expected"); 822 nextToken(); 823 if (FormatTok->Tok.is(tok::l_paren)) 824 parseParens(); 825 if (FormatTok->Tok.is(tok::l_brace)) { 826 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 827 addUnwrappedLine(); 828 parseBlock(/*MustBeDeclaration=*/false); 829 addUnwrappedLine(); 830 } else { 831 addUnwrappedLine(); 832 ++Line->Level; 833 parseStructuralElement(); 834 --Line->Level; 835 } 836 } 837 838 void UnwrappedLineParser::parseDoWhile() { 839 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 840 nextToken(); 841 if (FormatTok->Tok.is(tok::l_brace)) { 842 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 843 addUnwrappedLine(); 844 parseBlock(/*MustBeDeclaration=*/false); 845 } else { 846 addUnwrappedLine(); 847 ++Line->Level; 848 parseStructuralElement(); 849 --Line->Level; 850 } 851 852 // FIXME: Add error handling. 853 if (!FormatTok->Tok.is(tok::kw_while)) { 854 addUnwrappedLine(); 855 return; 856 } 857 858 nextToken(); 859 parseStructuralElement(); 860 } 861 862 void UnwrappedLineParser::parseLabel() { 863 if (FormatTok->Tok.isNot(tok::colon)) 864 return; 865 nextToken(); 866 unsigned OldLineLevel = Line->Level; 867 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 868 --Line->Level; 869 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 870 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 871 addUnwrappedLine(); 872 parseBlock(/*MustBeDeclaration=*/false); 873 if (FormatTok->Tok.is(tok::kw_break)) { 874 // "break;" after "}" on its own line only for BS_Allman 875 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 876 addUnwrappedLine(); 877 parseStructuralElement(); 878 } 879 } 880 addUnwrappedLine(); 881 Line->Level = OldLineLevel; 882 } 883 884 void UnwrappedLineParser::parseCaseLabel() { 885 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 886 // FIXME: fix handling of complex expressions here. 887 do { 888 nextToken(); 889 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 890 parseLabel(); 891 } 892 893 void UnwrappedLineParser::parseSwitch() { 894 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 895 nextToken(); 896 if (FormatTok->Tok.is(tok::l_paren)) 897 parseParens(); 898 if (FormatTok->Tok.is(tok::l_brace)) { 899 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 900 addUnwrappedLine(); 901 parseBlock(/*MustBeDeclaration=*/false); 902 addUnwrappedLine(); 903 } else { 904 addUnwrappedLine(); 905 ++Line->Level; 906 parseStructuralElement(); 907 --Line->Level; 908 } 909 } 910 911 void UnwrappedLineParser::parseAccessSpecifier() { 912 nextToken(); 913 // Otherwise, we don't know what it is, and we'd better keep the next token. 914 if (FormatTok->Tok.is(tok::colon)) 915 nextToken(); 916 addUnwrappedLine(); 917 } 918 919 void UnwrappedLineParser::parseEnum() { 920 nextToken(); 921 if (FormatTok->Tok.is(tok::identifier) || 922 FormatTok->Tok.is(tok::kw___attribute) || 923 FormatTok->Tok.is(tok::kw___declspec)) { 924 nextToken(); 925 // We can have macros or attributes in between 'enum' and the enum name. 926 if (FormatTok->Tok.is(tok::l_paren)) { 927 parseParens(); 928 } 929 if (FormatTok->Tok.is(tok::identifier)) 930 nextToken(); 931 } 932 if (FormatTok->Tok.is(tok::l_brace)) { 933 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 934 addUnwrappedLine(); 935 nextToken(); 936 addUnwrappedLine(); 937 ++Line->Level; 938 do { 939 switch (FormatTok->Tok.getKind()) { 940 case tok::l_paren: 941 parseParens(); 942 break; 943 case tok::r_brace: 944 addUnwrappedLine(); 945 nextToken(); 946 --Line->Level; 947 return; 948 case tok::comma: 949 nextToken(); 950 addUnwrappedLine(); 951 break; 952 default: 953 nextToken(); 954 break; 955 } 956 } while (!eof()); 957 } 958 // We fall through to parsing a structural element afterwards, so that in 959 // enum A {} n, m; 960 // "} n, m;" will end up in one unwrapped line. 961 } 962 963 void UnwrappedLineParser::parseRecord() { 964 nextToken(); 965 if (FormatTok->Tok.is(tok::identifier) || 966 FormatTok->Tok.is(tok::kw___attribute) || 967 FormatTok->Tok.is(tok::kw___declspec)) { 968 nextToken(); 969 // We can have macros or attributes in between 'class' and the class name. 970 if (FormatTok->Tok.is(tok::l_paren)) { 971 parseParens(); 972 } 973 // The actual identifier can be a nested name specifier, and in macros 974 // it is often token-pasted. 975 while (FormatTok->Tok.is(tok::identifier) || 976 FormatTok->Tok.is(tok::coloncolon) || 977 FormatTok->Tok.is(tok::hashhash)) 978 nextToken(); 979 980 // Note that parsing away template declarations here leads to incorrectly 981 // accepting function declarations as record declarations. 982 // In general, we cannot solve this problem. Consider: 983 // class A<int> B() {} 984 // which can be a function definition or a class definition when B() is a 985 // macro. If we find enough real-world cases where this is a problem, we 986 // can parse for the 'template' keyword in the beginning of the statement, 987 // and thus rule out the record production in case there is no template 988 // (this would still leave us with an ambiguity between template function 989 // and class declarations). 990 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { 991 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { 992 if (FormatTok->Tok.is(tok::semi)) 993 return; 994 nextToken(); 995 } 996 } 997 } 998 if (FormatTok->Tok.is(tok::l_brace)) { 999 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 1000 Style.BreakBeforeBraces == FormatStyle::BS_Allman) 1001 addUnwrappedLine(); 1002 1003 parseBlock(/*MustBeDeclaration=*/true); 1004 } 1005 // We fall through to parsing a structural element afterwards, so 1006 // class A {} n, m; 1007 // will end up in one unwrapped line. 1008 } 1009 1010 void UnwrappedLineParser::parseObjCProtocolList() { 1011 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1012 do 1013 nextToken(); 1014 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1015 nextToken(); // Skip '>'. 1016 } 1017 1018 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1019 do { 1020 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 1021 nextToken(); 1022 addUnwrappedLine(); 1023 break; 1024 } 1025 parseStructuralElement(); 1026 } while (!eof()); 1027 } 1028 1029 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 1030 nextToken(); 1031 nextToken(); // interface name 1032 1033 // @interface can be followed by either a base class, or a category. 1034 if (FormatTok->Tok.is(tok::colon)) { 1035 nextToken(); 1036 nextToken(); // base class name 1037 } else if (FormatTok->Tok.is(tok::l_paren)) 1038 // Skip category, if present. 1039 parseParens(); 1040 1041 if (FormatTok->Tok.is(tok::less)) 1042 parseObjCProtocolList(); 1043 1044 // If instance variables are present, keep the '{' on the first line too. 1045 if (FormatTok->Tok.is(tok::l_brace)) 1046 parseBlock(/*MustBeDeclaration=*/true); 1047 1048 // With instance variables, this puts '}' on its own line. Without instance 1049 // variables, this ends the @interface line. 1050 addUnwrappedLine(); 1051 1052 parseObjCUntilAtEnd(); 1053 } 1054 1055 void UnwrappedLineParser::parseObjCProtocol() { 1056 nextToken(); 1057 nextToken(); // protocol name 1058 1059 if (FormatTok->Tok.is(tok::less)) 1060 parseObjCProtocolList(); 1061 1062 // Check for protocol declaration. 1063 if (FormatTok->Tok.is(tok::semi)) { 1064 nextToken(); 1065 return addUnwrappedLine(); 1066 } 1067 1068 addUnwrappedLine(); 1069 parseObjCUntilAtEnd(); 1070 } 1071 1072 void UnwrappedLineParser::addUnwrappedLine() { 1073 if (Line->Tokens.empty()) 1074 return; 1075 DEBUG({ 1076 llvm::dbgs() << "Line(" << Line->Level << ")" 1077 << (Line->InPPDirective ? " MACRO" : "") << ": "; 1078 for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(), 1079 E = Line->Tokens.end(); 1080 I != E; ++I) { 1081 llvm::dbgs() << (*I)->Tok.getName() << " "; 1082 } 1083 llvm::dbgs() << "\n"; 1084 }); 1085 CurrentLines->push_back(*Line); 1086 Line->Tokens.clear(); 1087 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1088 for (std::vector<UnwrappedLine>::iterator 1089 I = PreprocessorDirectives.begin(), 1090 E = PreprocessorDirectives.end(); 1091 I != E; ++I) { 1092 CurrentLines->push_back(*I); 1093 } 1094 PreprocessorDirectives.clear(); 1095 } 1096 } 1097 1098 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1099 1100 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1101 bool JustComments = Line->Tokens.empty(); 1102 for (SmallVectorImpl<FormatToken *>::const_iterator 1103 I = CommentsBeforeNextToken.begin(), 1104 E = CommentsBeforeNextToken.end(); 1105 I != E; ++I) { 1106 if ((*I)->NewlinesBefore && JustComments) { 1107 addUnwrappedLine(); 1108 } 1109 pushToken(*I); 1110 } 1111 if (NewlineBeforeNext && JustComments) { 1112 addUnwrappedLine(); 1113 } 1114 CommentsBeforeNextToken.clear(); 1115 } 1116 1117 void UnwrappedLineParser::nextToken() { 1118 if (eof()) 1119 return; 1120 flushComments(FormatTok->NewlinesBefore > 0); 1121 pushToken(FormatTok); 1122 readToken(); 1123 } 1124 1125 void UnwrappedLineParser::readToken() { 1126 bool CommentsInCurrentLine = true; 1127 do { 1128 FormatTok = Tokens->getNextToken(); 1129 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1130 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1131 // If there is an unfinished unwrapped line, we flush the preprocessor 1132 // directives only after that unwrapped line was finished later. 1133 bool SwitchToPreprocessorLines = 1134 !Line->Tokens.empty() && CurrentLines == &Lines; 1135 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1136 // Comments stored before the preprocessor directive need to be output 1137 // before the preprocessor directive, at the same level as the 1138 // preprocessor directive, as we consider them to apply to the directive. 1139 flushComments(FormatTok->NewlinesBefore > 0); 1140 parsePPDirective(); 1141 } 1142 1143 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1144 !Line->InPPDirective) { 1145 continue; 1146 } 1147 1148 if (!FormatTok->Tok.is(tok::comment)) 1149 return; 1150 if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) { 1151 CommentsInCurrentLine = false; 1152 } 1153 if (CommentsInCurrentLine) { 1154 pushToken(FormatTok); 1155 } else { 1156 CommentsBeforeNextToken.push_back(FormatTok); 1157 } 1158 } while (!eof()); 1159 } 1160 1161 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1162 Line->Tokens.push_back(Tok); 1163 if (MustBreakBeforeNextToken) { 1164 Line->Tokens.back()->MustBreakBefore = true; 1165 MustBreakBeforeNextToken = false; 1166 } 1167 } 1168 1169 } // end namespace format 1170 } // end namespace clang 1171