1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-formatter" 17 18 #include "TokenAnnotator.h" 19 #include "UnwrappedLineParser.h" 20 #include "clang/Basic/Diagnostic.h" 21 #include "clang/Basic/OperatorPrecedence.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Format/Format.h" 24 #include "clang/Frontend/TextDiagnosticPrinter.h" 25 #include "clang/Lex/Lexer.h" 26 #include "llvm/Support/Allocator.h" 27 #include "llvm/Support/Debug.h" 28 #include <queue> 29 #include <string> 30 31 namespace clang { 32 namespace format { 33 34 FormatStyle getLLVMStyle() { 35 FormatStyle LLVMStyle; 36 LLVMStyle.ColumnLimit = 80; 37 LLVMStyle.MaxEmptyLinesToKeep = 1; 38 LLVMStyle.PointerBindsToType = false; 39 LLVMStyle.DerivePointerBinding = false; 40 LLVMStyle.AccessModifierOffset = -2; 41 LLVMStyle.Standard = FormatStyle::LS_Cpp03; 42 LLVMStyle.IndentCaseLabels = false; 43 LLVMStyle.SpacesBeforeTrailingComments = 1; 44 LLVMStyle.BinPackParameters = true; 45 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 46 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 47 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 48 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 49 LLVMStyle.PenaltyExcessCharacter = 1000000; 50 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5; 51 return LLVMStyle; 52 } 53 54 FormatStyle getGoogleStyle() { 55 FormatStyle GoogleStyle; 56 GoogleStyle.ColumnLimit = 80; 57 GoogleStyle.MaxEmptyLinesToKeep = 1; 58 GoogleStyle.PointerBindsToType = true; 59 GoogleStyle.DerivePointerBinding = true; 60 GoogleStyle.AccessModifierOffset = -1; 61 GoogleStyle.Standard = FormatStyle::LS_Auto; 62 GoogleStyle.IndentCaseLabels = true; 63 GoogleStyle.SpacesBeforeTrailingComments = 2; 64 GoogleStyle.BinPackParameters = true; 65 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; 66 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 67 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 68 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 69 GoogleStyle.PenaltyExcessCharacter = 1000000; 70 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100; 71 return GoogleStyle; 72 } 73 74 FormatStyle getChromiumStyle() { 75 FormatStyle ChromiumStyle = getGoogleStyle(); 76 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 77 ChromiumStyle.BinPackParameters = false; 78 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 79 ChromiumStyle.DerivePointerBinding = false; 80 return ChromiumStyle; 81 } 82 83 static bool isTrailingComment(const AnnotatedToken &Tok) { 84 return Tok.is(tok::comment) && 85 (Tok.Children.empty() || Tok.Children[0].MustBreakBefore); 86 } 87 88 // Returns the length of everything up to the first possible line break after 89 // the ), ], } or > matching \c Tok. 90 static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) { 91 if (Tok.MatchingParen == NULL) 92 return 0; 93 AnnotatedToken *End = Tok.MatchingParen; 94 while (!End->Children.empty() && !End->Children[0].CanBreakBefore) { 95 End = &End->Children[0]; 96 } 97 return End->TotalLength - Tok.TotalLength + 1; 98 } 99 100 /// \brief Manages the whitespaces around tokens and their replacements. 101 /// 102 /// This includes special handling for certain constructs, e.g. the alignment of 103 /// trailing line comments. 104 class WhitespaceManager { 105 public: 106 WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 107 108 /// \brief Replaces the whitespace in front of \p Tok. Only call once for 109 /// each \c AnnotatedToken. 110 void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 111 unsigned Spaces, unsigned WhitespaceStartColumn, 112 const FormatStyle &Style) { 113 // 2+ newlines mean an empty line separating logic scopes. 114 if (NewLines >= 2) 115 alignComments(); 116 117 // Align line comments if they are trailing or if they continue other 118 // trailing comments. 119 if (isTrailingComment(Tok)) { 120 // Remove the comment's trailing whitespace. 121 if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength) 122 Replaces.insert(tooling::Replacement( 123 SourceMgr, Tok.FormatTok.Tok.getLocation().getLocWithOffset( 124 Tok.FormatTok.TokenLength), 125 Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, "")); 126 127 // Align comment with other comments. 128 if (Tok.Parent != NULL || !Comments.empty()) { 129 if (Style.ColumnLimit >= 130 Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) { 131 StoredComment Comment; 132 Comment.Tok = Tok.FormatTok; 133 Comment.Spaces = Spaces; 134 Comment.NewLines = NewLines; 135 Comment.MinColumn = 136 NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces; 137 Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength; 138 Comments.push_back(Comment); 139 return; 140 } 141 } 142 } 143 144 // If this line does not have a trailing comment, align the stored comments. 145 if (Tok.Children.empty() && !isTrailingComment(Tok)) 146 alignComments(); 147 148 if (Tok.Type == TT_BlockComment) 149 indentBlockComment(Tok.FormatTok, Spaces); 150 151 storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces)); 152 } 153 154 /// \brief Like \c replaceWhitespace, but additionally adds right-aligned 155 /// backslashes to escape newlines inside a preprocessor directive. 156 /// 157 /// This function and \c replaceWhitespace have the same behavior if 158 /// \c Newlines == 0. 159 void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 160 unsigned Spaces, unsigned WhitespaceStartColumn, 161 const FormatStyle &Style) { 162 storeReplacement( 163 Tok.FormatTok, 164 getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style)); 165 } 166 167 /// \brief Inserts a line break into the middle of a token. 168 /// 169 /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line 170 /// break and \p Postfix before the rest of the token starts in the next line. 171 /// 172 /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are 173 /// used to generate the correct line break. 174 void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix, 175 StringRef Postfix, bool InPPDirective, unsigned Spaces, 176 unsigned WhitespaceStartColumn, const FormatStyle &Style) { 177 std::string NewLineText; 178 if (!InPPDirective) 179 NewLineText = getNewLineText(1, Spaces); 180 else 181 NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style); 182 std::string ReplacementText = (Prefix + NewLineText + Postfix).str(); 183 SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart 184 .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset); 185 Replaces.insert( 186 tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText)); 187 } 188 189 /// \brief Returns all the \c Replacements created during formatting. 190 const tooling::Replacements &generateReplacements() { 191 alignComments(); 192 return Replaces; 193 } 194 195 private: 196 void indentBlockComment(const FormatToken &Tok, int Indent) { 197 SourceLocation TokenLoc = Tok.Tok.getLocation(); 198 int IndentDelta = Indent - SourceMgr.getSpellingColumnNumber(TokenLoc) + 1; 199 const char *Start = SourceMgr.getCharacterData(TokenLoc); 200 const char *Current = Start; 201 const char *TokEnd = Current + Tok.TokenLength; 202 llvm::SmallVector<SourceLocation, 16> LineStarts; 203 while (Current < TokEnd) { 204 if (*Current == '\n') { 205 ++Current; 206 LineStarts.push_back(TokenLoc.getLocWithOffset(Current - Start)); 207 // If we need to outdent the line, check that it's indented enough. 208 for (int i = 0; i < -IndentDelta; ++i, ++Current) 209 if (Current >= TokEnd || *Current != ' ') 210 return; 211 } else { 212 ++Current; 213 } 214 } 215 216 for (size_t i = 0; i < LineStarts.size(); ++i) { 217 if (IndentDelta > 0) 218 Replaces.insert(tooling::Replacement(SourceMgr, LineStarts[i], 0, 219 std::string(IndentDelta, ' '))); 220 else if (IndentDelta < 0) 221 Replaces.insert( 222 tooling::Replacement(SourceMgr, LineStarts[i], -IndentDelta, "")); 223 } 224 } 225 226 std::string getNewLineText(unsigned NewLines, unsigned Spaces) { 227 return std::string(NewLines, '\n') + std::string(Spaces, ' '); 228 } 229 230 std::string 231 getNewLineText(unsigned NewLines, unsigned Spaces, 232 unsigned WhitespaceStartColumn, const FormatStyle &Style) { 233 std::string NewLineText; 234 if (NewLines > 0) { 235 unsigned Offset = 236 std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn); 237 for (unsigned i = 0; i < NewLines; ++i) { 238 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); 239 NewLineText += "\\\n"; 240 Offset = 0; 241 } 242 } 243 return NewLineText + std::string(Spaces, ' '); 244 } 245 246 /// \brief Structure to store a comment for later layout and alignment. 247 struct StoredComment { 248 FormatToken Tok; 249 unsigned MinColumn; 250 unsigned MaxColumn; 251 unsigned NewLines; 252 unsigned Spaces; 253 }; 254 SmallVector<StoredComment, 16> Comments; 255 typedef SmallVector<StoredComment, 16>::iterator comment_iterator; 256 257 /// \brief Try to align all stashed comments. 258 void alignComments() { 259 unsigned MinColumn = 0; 260 unsigned MaxColumn = UINT_MAX; 261 comment_iterator Start = Comments.begin(); 262 for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) { 263 if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { 264 alignComments(Start, I, MinColumn); 265 MinColumn = I->MinColumn; 266 MaxColumn = I->MaxColumn; 267 Start = I; 268 } else { 269 MinColumn = std::max(MinColumn, I->MinColumn); 270 MaxColumn = std::min(MaxColumn, I->MaxColumn); 271 } 272 } 273 alignComments(Start, Comments.end(), MinColumn); 274 Comments.clear(); 275 } 276 277 /// \brief Put all the comments between \p I and \p E into \p Column. 278 void alignComments(comment_iterator I, comment_iterator E, unsigned Column) { 279 while (I != E) { 280 unsigned Spaces = I->Spaces + Column - I->MinColumn; 281 storeReplacement(I->Tok, std::string(I->NewLines, '\n') + 282 std::string(Spaces, ' ')); 283 ++I; 284 } 285 } 286 287 /// \brief Stores \p Text as the replacement for the whitespace in front of 288 /// \p Tok. 289 void storeReplacement(const FormatToken &Tok, const std::string Text) { 290 // Don't create a replacement, if it does not change anything. 291 if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart), 292 Tok.WhiteSpaceLength) == Text) 293 return; 294 295 Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart, 296 Tok.WhiteSpaceLength, Text)); 297 } 298 299 SourceManager &SourceMgr; 300 tooling::Replacements Replaces; 301 }; 302 303 class UnwrappedLineFormatter { 304 public: 305 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 306 const AnnotatedLine &Line, unsigned FirstIndent, 307 const AnnotatedToken &RootToken, 308 WhitespaceManager &Whitespaces, bool StructuralError) 309 : Style(Style), SourceMgr(SourceMgr), Line(Line), 310 FirstIndent(FirstIndent), RootToken(RootToken), 311 Whitespaces(Whitespaces), Count(0) {} 312 313 /// \brief Formats an \c UnwrappedLine. 314 /// 315 /// \returns The column after the last token in the last line of the 316 /// \c UnwrappedLine. 317 unsigned format(const AnnotatedLine *NextLine) { 318 // Initialize state dependent on indent. 319 LineState State; 320 State.Column = FirstIndent; 321 State.NextToken = &RootToken; 322 State.Stack.push_back( 323 ParenState(FirstIndent + 4, FirstIndent, !Style.BinPackParameters, 324 /*HasMultiParameterLine=*/ false)); 325 State.VariablePos = 0; 326 State.LineContainsContinuedForLoopSection = false; 327 State.ParenLevel = 0; 328 State.StartOfStringLiteral = 0; 329 State.StartOfLineLevel = State.ParenLevel; 330 331 DEBUG({ 332 DebugTokenState(*State.NextToken); 333 }); 334 335 // The first token has already been indented and thus consumed. 336 moveStateToNextToken(State, /*DryRun=*/ false); 337 338 // If everything fits on a single line, just put it there. 339 unsigned ColumnLimit = Style.ColumnLimit; 340 if (NextLine && NextLine->InPPDirective && 341 !NextLine->First.FormatTok.HasUnescapedNewline) 342 ColumnLimit = getColumnLimit(); 343 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { 344 while (State.NextToken != NULL) { 345 addTokenToState(false, false, State); 346 } 347 return State.Column; 348 } 349 350 // If the ObjC method declaration does not fit on a line, we should format 351 // it with one arg per line. 352 if (Line.Type == LT_ObjCMethodDecl) 353 State.Stack.back().BreakBeforeParameter = true; 354 355 // Find best solution in solution space. 356 return analyzeSolutionSpace(State); 357 } 358 359 private: 360 void DebugTokenState(const AnnotatedToken &AnnotatedTok) { 361 const Token &Tok = AnnotatedTok.FormatTok.Tok; 362 llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 363 Tok.getLength()); 364 llvm::errs(); 365 } 366 367 struct ParenState { 368 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, 369 bool HasMultiParameterLine) 370 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 371 BreakBeforeClosingBrace(false), QuestionColumn(0), 372 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 373 HasMultiParameterLine(HasMultiParameterLine), ColonPos(0), 374 StartOfFunctionCall(0) {} 375 376 /// \brief The position to which a specific parenthesis level needs to be 377 /// indented. 378 unsigned Indent; 379 380 /// \brief The position of the last space on each level. 381 /// 382 /// Used e.g. to break like: 383 /// functionCall(Parameter, otherCall( 384 /// OtherParameter)); 385 unsigned LastSpace; 386 387 /// \brief The position the first "<<" operator encountered on each level. 388 /// 389 /// Used to align "<<" operators. 0 if no such operator has been encountered 390 /// on a level. 391 unsigned FirstLessLess; 392 393 /// \brief Whether a newline needs to be inserted before the block's closing 394 /// brace. 395 /// 396 /// We only want to insert a newline before the closing brace if there also 397 /// was a newline after the beginning left brace. 398 bool BreakBeforeClosingBrace; 399 400 /// \brief The column of a \c ? in a conditional expression; 401 unsigned QuestionColumn; 402 403 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 404 /// lines, in this context. 405 bool AvoidBinPacking; 406 407 /// \brief Break after the next comma (or all the commas in this context if 408 /// \c AvoidBinPacking is \c true). 409 bool BreakBeforeParameter; 410 411 /// \brief This context already has a line with more than one parameter. 412 bool HasMultiParameterLine; 413 414 /// \brief The position of the colon in an ObjC method declaration/call. 415 unsigned ColonPos; 416 417 /// \brief The start of the most recent function in a builder-type call. 418 unsigned StartOfFunctionCall; 419 420 bool operator<(const ParenState &Other) const { 421 if (Indent != Other.Indent) 422 return Indent < Other.Indent; 423 if (LastSpace != Other.LastSpace) 424 return LastSpace < Other.LastSpace; 425 if (FirstLessLess != Other.FirstLessLess) 426 return FirstLessLess < Other.FirstLessLess; 427 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 428 return BreakBeforeClosingBrace; 429 if (QuestionColumn != Other.QuestionColumn) 430 return QuestionColumn < Other.QuestionColumn; 431 if (AvoidBinPacking != Other.AvoidBinPacking) 432 return AvoidBinPacking; 433 if (BreakBeforeParameter != Other.BreakBeforeParameter) 434 return BreakBeforeParameter; 435 if (HasMultiParameterLine != Other.HasMultiParameterLine) 436 return HasMultiParameterLine; 437 if (ColonPos != Other.ColonPos) 438 return ColonPos < Other.ColonPos; 439 if (StartOfFunctionCall != Other.StartOfFunctionCall) 440 return StartOfFunctionCall < Other.StartOfFunctionCall; 441 return false; 442 } 443 }; 444 445 /// \brief The current state when indenting a unwrapped line. 446 /// 447 /// As the indenting tries different combinations this is copied by value. 448 struct LineState { 449 /// \brief The number of used columns in the current line. 450 unsigned Column; 451 452 /// \brief The token that needs to be next formatted. 453 const AnnotatedToken *NextToken; 454 455 /// \brief The column of the first variable name in a variable declaration. 456 /// 457 /// Used to align further variables if necessary. 458 unsigned VariablePos; 459 460 /// \brief \c true if this line contains a continued for-loop section. 461 bool LineContainsContinuedForLoopSection; 462 463 /// \brief The level of nesting inside (), [], <> and {}. 464 unsigned ParenLevel; 465 466 /// \brief The \c ParenLevel at the start of this line. 467 unsigned StartOfLineLevel; 468 469 /// \brief The start column of the string literal, if we're in a string 470 /// literal sequence, 0 otherwise. 471 unsigned StartOfStringLiteral; 472 473 /// \brief A stack keeping track of properties applying to parenthesis 474 /// levels. 475 std::vector<ParenState> Stack; 476 477 /// \brief Comparison operator to be able to used \c LineState in \c map. 478 bool operator<(const LineState &Other) const { 479 if (NextToken != Other.NextToken) 480 return NextToken < Other.NextToken; 481 if (Column != Other.Column) 482 return Column < Other.Column; 483 if (VariablePos != Other.VariablePos) 484 return VariablePos < Other.VariablePos; 485 if (LineContainsContinuedForLoopSection != 486 Other.LineContainsContinuedForLoopSection) 487 return LineContainsContinuedForLoopSection; 488 if (ParenLevel != Other.ParenLevel) 489 return ParenLevel < Other.ParenLevel; 490 if (StartOfLineLevel != Other.StartOfLineLevel) 491 return StartOfLineLevel < Other.StartOfLineLevel; 492 if (StartOfStringLiteral != Other.StartOfStringLiteral) 493 return StartOfStringLiteral < Other.StartOfStringLiteral; 494 return Stack < Other.Stack; 495 } 496 }; 497 498 /// \brief Appends the next token to \p State and updates information 499 /// necessary for indentation. 500 /// 501 /// Puts the token on the current line if \p Newline is \c true and adds a 502 /// line break and necessary indentation otherwise. 503 /// 504 /// If \p DryRun is \c false, also creates and stores the required 505 /// \c Replacement. 506 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { 507 const AnnotatedToken &Current = *State.NextToken; 508 const AnnotatedToken &Previous = *State.NextToken->Parent; 509 assert(State.Stack.size()); 510 511 if (Current.Type == TT_ImplicitStringLiteral) { 512 State.Column += State.NextToken->FormatTok.WhiteSpaceLength + 513 State.NextToken->FormatTok.TokenLength; 514 if (State.NextToken->Children.empty()) 515 State.NextToken = NULL; 516 else 517 State.NextToken = &State.NextToken->Children[0]; 518 return 0; 519 } 520 521 if (Newline) { 522 unsigned WhitespaceStartColumn = State.Column; 523 if (Current.is(tok::r_brace)) { 524 State.Column = Line.Level * 2; 525 } else if (Current.is(tok::string_literal) && 526 State.StartOfStringLiteral != 0) { 527 State.Column = State.StartOfStringLiteral; 528 State.Stack.back().BreakBeforeParameter = true; 529 } else if (Current.is(tok::lessless) && 530 State.Stack.back().FirstLessLess != 0) { 531 State.Column = State.Stack.back().FirstLessLess; 532 } else if (State.ParenLevel != 0 && 533 (Previous.isOneOf(tok::equal, tok::coloncolon) || 534 Current.isOneOf(tok::period, tok::arrow, tok::question))) { 535 // Indent and extra 4 spaces after if we know the current expression is 536 // continued. Don't do that on the top level, as we already indent 4 537 // there. 538 State.Column = std::max(State.Stack.back().LastSpace, 539 State.Stack.back().Indent) + 4; 540 } else if (Current.Type == TT_ConditionalExpr) { 541 State.Column = State.Stack.back().QuestionColumn; 542 } else if (Previous.is(tok::comma) && State.VariablePos != 0 && 543 ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) || 544 State.ParenLevel == 0)) { 545 State.Column = State.VariablePos; 546 } else if (Previous.ClosesTemplateDeclaration || 547 (Current.Type == TT_StartOfName && State.ParenLevel == 0)) { 548 State.Column = State.Stack.back().Indent - 4; 549 } else if (Current.Type == TT_ObjCSelectorName) { 550 if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) { 551 State.Column = 552 State.Stack.back().ColonPos - Current.FormatTok.TokenLength; 553 } else { 554 State.Column = State.Stack.back().Indent; 555 State.Stack.back().ColonPos = 556 State.Column + Current.FormatTok.TokenLength; 557 } 558 } else if (Previous.Type == TT_ObjCMethodExpr || 559 Current.Type == TT_StartOfName) { 560 State.Column = State.Stack.back().Indent + 4; 561 } else { 562 State.Column = State.Stack.back().Indent; 563 } 564 565 if (Current.is(tok::question)) 566 State.Stack.back().BreakBeforeParameter = true; 567 if (Previous.isOneOf(tok::comma, tok::semi) && 568 !State.Stack.back().AvoidBinPacking) 569 State.Stack.back().BreakBeforeParameter = false; 570 571 if (!DryRun) { 572 unsigned NewLines = 1; 573 if (Current.Type == TT_LineComment) 574 NewLines = 575 std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore, 576 Style.MaxEmptyLinesToKeep + 1)); 577 if (!Line.InPPDirective) 578 Whitespaces.replaceWhitespace(Current, NewLines, State.Column, 579 WhitespaceStartColumn, Style); 580 else 581 Whitespaces.replacePPWhitespace(Current, NewLines, State.Column, 582 WhitespaceStartColumn, Style); 583 } 584 585 State.Stack.back().LastSpace = State.Column; 586 State.StartOfLineLevel = State.ParenLevel; 587 588 // Any break on this level means that the parent level has been broken 589 // and we need to avoid bin packing there. 590 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 591 State.Stack[i].BreakBeforeParameter = true; 592 } 593 if (Current.isOneOf(tok::period, tok::arrow)) 594 State.Stack.back().BreakBeforeParameter = true; 595 596 // If we break after {, we should also break before the corresponding }. 597 if (Previous.is(tok::l_brace)) 598 State.Stack.back().BreakBeforeClosingBrace = true; 599 600 if (State.Stack.back().AvoidBinPacking) { 601 // If we are breaking after '(', '{', '<', this is not bin packing 602 // unless AllowAllParametersOfDeclarationOnNextLine is false. 603 if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) || 604 (!Style.AllowAllParametersOfDeclarationOnNextLine && 605 Line.MustBeDeclaration)) 606 State.Stack.back().BreakBeforeParameter = true; 607 } 608 } else { 609 // FIXME: Put VariablePos into ParenState and remove second part of if(). 610 if (Current.is(tok::equal) && 611 (RootToken.is(tok::kw_for) || State.ParenLevel == 0)) 612 State.VariablePos = State.Column - Previous.FormatTok.TokenLength; 613 614 unsigned Spaces = State.NextToken->SpacesRequiredBefore; 615 616 if (!DryRun) 617 Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style); 618 619 if (Current.Type == TT_ObjCSelectorName && 620 State.Stack.back().ColonPos == 0) { 621 if (State.Stack.back().Indent + Current.LongestObjCSelectorName > 622 State.Column + Spaces + Current.FormatTok.TokenLength) 623 State.Stack.back().ColonPos = 624 State.Stack.back().Indent + Current.LongestObjCSelectorName; 625 else 626 State.Stack.back().ColonPos = 627 State.Column + Spaces + Current.FormatTok.TokenLength; 628 } 629 630 if (Current.Type != TT_LineComment && 631 (Previous.isOneOf(tok::l_paren, tok::l_brace) || 632 State.NextToken->Parent->Type == TT_TemplateOpener)) 633 State.Stack.back().Indent = State.Column + Spaces; 634 if (Previous.is(tok::comma) && !isTrailingComment(Current)) 635 State.Stack.back().HasMultiParameterLine = true; 636 637 State.Column += Spaces; 638 if (Current.is(tok::l_paren) && Previous.is(tok::kw_if)) 639 // Treat the condition inside an if as if it was a second function 640 // parameter, i.e. let nested calls have an indent of 4. 641 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 642 else if (Previous.is(tok::comma) && State.ParenLevel != 0) 643 // Top-level spaces are exempt as that mostly leads to better results. 644 State.Stack.back().LastSpace = State.Column; 645 else if ((Previous.Type == TT_BinaryOperator || 646 Previous.Type == TT_ConditionalExpr || 647 Previous.Type == TT_CtorInitializerColon) && 648 getPrecedence(Previous) != prec::Assignment) 649 State.Stack.back().LastSpace = State.Column; 650 else if (Previous.Type == TT_InheritanceColon) 651 State.Stack.back().Indent = State.Column; 652 else if (Previous.ParameterCount > 1 && 653 (Previous.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) || 654 Previous.Type == TT_TemplateOpener)) 655 // If this function has multiple parameters, indent nested calls from 656 // the start of the first parameter. 657 State.Stack.back().LastSpace = State.Column; 658 } 659 660 return moveStateToNextToken(State, DryRun); 661 } 662 663 /// \brief Mark the next token as consumed in \p State and modify its stacks 664 /// accordingly. 665 unsigned moveStateToNextToken(LineState &State, bool DryRun) { 666 const AnnotatedToken &Current = *State.NextToken; 667 assert(State.Stack.size()); 668 669 if (Current.Type == TT_InheritanceColon) 670 State.Stack.back().AvoidBinPacking = true; 671 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 672 State.Stack.back().FirstLessLess = State.Column; 673 if (Current.is(tok::question)) 674 State.Stack.back().QuestionColumn = State.Column; 675 if (Current.isOneOf(tok::period, tok::arrow) && 676 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) 677 State.Stack.back().StartOfFunctionCall = 678 Current.LastInChainOfCalls ? 0 : State.Column; 679 if (Current.Type == TT_CtorInitializerColon) { 680 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 681 State.Stack.back().AvoidBinPacking = true; 682 State.Stack.back().BreakBeforeParameter = false; 683 } 684 685 // Insert scopes created by fake parenthesis. 686 for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) { 687 ParenState NewParenState = State.Stack.back(); 688 NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent); 689 NewParenState.BreakBeforeParameter = false; 690 State.Stack.push_back(NewParenState); 691 } 692 693 // If we encounter an opening (, [, { or <, we add a level to our stacks to 694 // prepare for the following tokens. 695 if (Current.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) || 696 State.NextToken->Type == TT_TemplateOpener) { 697 unsigned NewIndent; 698 bool AvoidBinPacking; 699 if (Current.is(tok::l_brace)) { 700 NewIndent = 2 + State.Stack.back().LastSpace; 701 AvoidBinPacking = false; 702 } else { 703 NewIndent = 4 + std::max(State.Stack.back().LastSpace, 704 State.Stack.back().StartOfFunctionCall); 705 AvoidBinPacking = 706 !Style.BinPackParameters || State.Stack.back().AvoidBinPacking; 707 } 708 State.Stack.push_back( 709 ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking, 710 State.Stack.back().HasMultiParameterLine)); 711 ++State.ParenLevel; 712 } 713 714 // If this '[' opens an ObjC call, determine whether all parameters fit into 715 // one line and put one per line if they don't. 716 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && 717 Current.MatchingParen != NULL) { 718 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) 719 State.Stack.back().BreakBeforeParameter = true; 720 } 721 722 // If we encounter a closing ), ], } or >, we can remove a level from our 723 // stacks. 724 if (Current.isOneOf(tok::r_paren, tok::r_square) || 725 (Current.is(tok::r_brace) && State.NextToken != &RootToken) || 726 State.NextToken->Type == TT_TemplateCloser) { 727 State.Stack.pop_back(); 728 --State.ParenLevel; 729 } 730 731 // Remove scopes created by fake parenthesis. 732 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { 733 State.Stack.pop_back(); 734 } 735 736 if (Current.is(tok::string_literal)) { 737 State.StartOfStringLiteral = State.Column; 738 } else if (Current.isNot(tok::comment)) { 739 State.StartOfStringLiteral = 0; 740 } 741 742 State.Column += Current.FormatTok.TokenLength; 743 744 if (State.NextToken->Children.empty()) 745 State.NextToken = NULL; 746 else 747 State.NextToken = &State.NextToken->Children[0]; 748 749 return breakProtrudingToken(Current, State, DryRun); 750 } 751 752 /// \brief If the current token sticks out over the end of the line, break 753 /// it if possible. 754 unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State, 755 bool DryRun) { 756 if (Current.isNot(tok::string_literal)) 757 return 0; 758 // Only break up default narrow strings. 759 if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0) 760 return 0; 761 762 unsigned Penalty = 0; 763 unsigned TailOffset = 0; 764 unsigned TailLength = Current.FormatTok.TokenLength; 765 unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; 766 unsigned OffsetFromStart = 0; 767 while (StartColumn + TailLength > getColumnLimit()) { 768 StringRef Text = StringRef( 769 Current.FormatTok.Tok.getLiteralData() + TailOffset, TailLength); 770 if (StartColumn + OffsetFromStart + 1 > getColumnLimit()) 771 break; 772 StringRef::size_type SplitPoint = getSplitPoint( 773 Text, getColumnLimit() - StartColumn - OffsetFromStart - 1); 774 if (SplitPoint == StringRef::npos) 775 break; 776 assert(SplitPoint != 0); 777 // +2, because 'Text' starts after the opening quotes, and does not 778 // include the closing quote we need to insert. 779 unsigned WhitespaceStartColumn = 780 StartColumn + OffsetFromStart + SplitPoint + 2; 781 State.Stack.back().LastSpace = StartColumn; 782 if (!DryRun) { 783 Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"", 784 Line.InPPDirective, StartColumn, 785 WhitespaceStartColumn, Style); 786 } 787 TailOffset += SplitPoint + 1; 788 TailLength -= SplitPoint + 1; 789 OffsetFromStart = 1; 790 Penalty += Style.PenaltyExcessCharacter; 791 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) 792 State.Stack[i].BreakBeforeParameter = true; 793 } 794 State.Column = StartColumn + TailLength; 795 return Penalty; 796 } 797 798 StringRef::size_type 799 getSplitPoint(StringRef Text, StringRef::size_type Offset) { 800 StringRef::size_type SpaceOffset = Text.rfind(' ', Offset); 801 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) 802 return SpaceOffset; 803 StringRef::size_type SlashOffset = Text.rfind('/', Offset); 804 if (SlashOffset != StringRef::npos && SlashOffset != 0) 805 return SlashOffset; 806 StringRef::size_type Split = getStartOfCharacter(Text, Offset); 807 if (Split != StringRef::npos && Split > 1) 808 // Do not split at 0. 809 return Split - 1; 810 return StringRef::npos; 811 } 812 813 StringRef::size_type 814 getStartOfCharacter(StringRef Text, StringRef::size_type Offset) { 815 StringRef::size_type NextEscape = Text.find('\\'); 816 while (NextEscape != StringRef::npos && NextEscape < Offset) { 817 StringRef::size_type SequenceLength = 818 getEscapeSequenceLength(Text.substr(NextEscape)); 819 if (Offset < NextEscape + SequenceLength) 820 return NextEscape; 821 NextEscape = Text.find('\\', NextEscape + SequenceLength); 822 } 823 return Offset; 824 } 825 826 unsigned getEscapeSequenceLength(StringRef Text) { 827 assert(Text[0] == '\\'); 828 if (Text.size() < 2) 829 return 1; 830 831 switch (Text[1]) { 832 case 'u': 833 return 6; 834 case 'U': 835 return 10; 836 case 'x': 837 return getHexLength(Text); 838 default: 839 if (Text[1] >= '0' && Text[1] <= '7') 840 return getOctalLength(Text); 841 return 2; 842 } 843 } 844 845 unsigned getHexLength(StringRef Text) { 846 unsigned I = 2; // Point after '\x'. 847 while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || 848 (Text[I] >= 'a' && Text[I] <= 'f') || 849 (Text[I] >= 'A' && Text[I] <= 'F'))) { 850 ++I; 851 } 852 return I; 853 } 854 855 unsigned getOctalLength(StringRef Text) { 856 unsigned I = 1; 857 while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { 858 ++I; 859 } 860 return I; 861 } 862 863 unsigned getColumnLimit() { 864 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); 865 } 866 867 /// \brief An edge in the solution space from \c Previous->State to \c State, 868 /// inserting a newline dependent on the \c NewLine. 869 struct StateNode { 870 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 871 : State(State), NewLine(NewLine), Previous(Previous) {} 872 LineState State; 873 bool NewLine; 874 StateNode *Previous; 875 }; 876 877 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 878 /// 879 /// In case of equal penalties, we want to prefer states that were inserted 880 /// first. During state generation we make sure that we insert states first 881 /// that break the line as late as possible. 882 typedef std::pair<unsigned, unsigned> OrderedPenalty; 883 884 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 885 /// \c State has the given \c OrderedPenalty. 886 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 887 888 /// \brief The BFS queue type. 889 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 890 std::greater<QueueItem> > QueueType; 891 892 /// \brief Analyze the entire solution space starting from \p InitialState. 893 /// 894 /// This implements a variant of Dijkstra's algorithm on the graph that spans 895 /// the solution space (\c LineStates are the nodes). The algorithm tries to 896 /// find the shortest path (the one with lowest penalty) from \p InitialState 897 /// to a state where all tokens are placed. 898 unsigned analyzeSolutionSpace(LineState &InitialState) { 899 std::set<LineState> Seen; 900 901 // Insert start element into queue. 902 StateNode *Node = 903 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 904 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 905 ++Count; 906 907 // While not empty, take first element and follow edges. 908 while (!Queue.empty()) { 909 unsigned Penalty = Queue.top().first.first; 910 StateNode *Node = Queue.top().second; 911 if (Node->State.NextToken == NULL) { 912 DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n"); 913 break; 914 } 915 Queue.pop(); 916 917 if (!Seen.insert(Node->State).second) 918 // State already examined with lower penalty. 919 continue; 920 921 addNextStateToQueue(Penalty, Node, /*NewLine=*/ false); 922 addNextStateToQueue(Penalty, Node, /*NewLine=*/ true); 923 } 924 925 if (Queue.empty()) 926 // We were unable to find a solution, do nothing. 927 // FIXME: Add diagnostic? 928 return 0; 929 930 // Reconstruct the solution. 931 reconstructPath(InitialState, Queue.top().second); 932 DEBUG(llvm::errs() << "---\n"); 933 934 // Return the column after the last token of the solution. 935 return Queue.top().second->State.Column; 936 } 937 938 void reconstructPath(LineState &State, StateNode *Current) { 939 // FIXME: This recursive implementation limits the possible number 940 // of tokens per line if compiled into a binary with small stack space. 941 // To become more independent of stack frame limitations we would need 942 // to also change the TokenAnnotator. 943 if (Current->Previous == NULL) 944 return; 945 reconstructPath(State, Current->Previous); 946 DEBUG({ 947 if (Current->NewLine) { 948 llvm::errs() 949 << "Penalty for splitting before " 950 << Current->Previous->State.NextToken->FormatTok.Tok.getName() 951 << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n"; 952 } 953 }); 954 addTokenToState(Current->NewLine, false, State); 955 } 956 957 /// \brief Add the following state to the analysis queue \c Queue. 958 /// 959 /// Assume the current state is \p PreviousNode and has been reached with a 960 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 961 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 962 bool NewLine) { 963 if (NewLine && !canBreak(PreviousNode->State)) 964 return; 965 if (!NewLine && mustBreak(PreviousNode->State)) 966 return; 967 if (NewLine) 968 Penalty += PreviousNode->State.NextToken->SplitPenalty; 969 970 StateNode *Node = new (Allocator.Allocate()) 971 StateNode(PreviousNode->State, NewLine, PreviousNode); 972 Penalty += addTokenToState(NewLine, true, Node->State); 973 if (Node->State.Column > getColumnLimit()) { 974 unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); 975 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; 976 } 977 978 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); 979 ++Count; 980 } 981 982 /// \brief Returns \c true, if a line break after \p State is allowed. 983 bool canBreak(const LineState &State) { 984 if (!State.NextToken->CanBreakBefore && 985 !(State.NextToken->is(tok::r_brace) && 986 State.Stack.back().BreakBeforeClosingBrace)) 987 return false; 988 // Trying to insert a parameter on a new line if there are already more than 989 // one parameter on the current line is bin packing. 990 if (State.Stack.back().HasMultiParameterLine && 991 State.Stack.back().AvoidBinPacking) 992 return false; 993 return true; 994 } 995 996 /// \brief Returns \c true, if a line break after \p State is mandatory. 997 bool mustBreak(const LineState &State) { 998 if (State.NextToken->MustBreakBefore) 999 return true; 1000 if (State.NextToken->is(tok::r_brace) && 1001 State.Stack.back().BreakBeforeClosingBrace) 1002 return true; 1003 if (State.NextToken->Parent->is(tok::semi) && 1004 State.LineContainsContinuedForLoopSection) 1005 return true; 1006 if ((State.NextToken->Parent->isOneOf(tok::comma, tok::semi) || 1007 State.NextToken->is(tok::question) || 1008 State.NextToken->Type == TT_ConditionalExpr) && 1009 State.Stack.back().BreakBeforeParameter && 1010 !isTrailingComment(*State.NextToken) && 1011 State.NextToken->isNot(tok::r_paren) && 1012 State.NextToken->isNot(tok::r_brace)) 1013 return true; 1014 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding 1015 // out whether it is the first parameter. Clean this up. 1016 if (State.NextToken->Type == TT_ObjCSelectorName && 1017 State.NextToken->LongestObjCSelectorName == 0 && 1018 State.Stack.back().BreakBeforeParameter) 1019 return true; 1020 if ((State.NextToken->Type == TT_CtorInitializerColon || 1021 (State.NextToken->Parent->ClosesTemplateDeclaration && 1022 State.ParenLevel == 0))) 1023 return true; 1024 if (State.NextToken->Type == TT_InlineASMColon) 1025 return true; 1026 // This prevents breaks like: 1027 // ... 1028 // SomeParameter, OtherParameter).DoSomething( 1029 // ... 1030 // As they hide "DoSomething" and generally bad for readability. 1031 if (State.NextToken->isOneOf(tok::period, tok::arrow) && 1032 getRemainingLength(State) + State.Column > getColumnLimit() && 1033 State.ParenLevel < State.StartOfLineLevel) 1034 return true; 1035 return false; 1036 } 1037 1038 // Returns the total number of columns required for the remaining tokens. 1039 unsigned getRemainingLength(const LineState &State) { 1040 if (State.NextToken && State.NextToken->Parent) 1041 return Line.Last->TotalLength - State.NextToken->Parent->TotalLength; 1042 return 0; 1043 } 1044 1045 FormatStyle Style; 1046 SourceManager &SourceMgr; 1047 const AnnotatedLine &Line; 1048 const unsigned FirstIndent; 1049 const AnnotatedToken &RootToken; 1050 WhitespaceManager &Whitespaces; 1051 1052 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1053 QueueType Queue; 1054 // Increasing count of \c StateNode items we have created. This is used 1055 // to create a deterministic order independent of the container. 1056 unsigned Count; 1057 }; 1058 1059 class LexerBasedFormatTokenSource : public FormatTokenSource { 1060 public: 1061 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) 1062 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), 1063 IdentTable(Lex.getLangOpts()) { 1064 Lex.SetKeepWhitespaceMode(true); 1065 } 1066 1067 virtual FormatToken getNextToken() { 1068 if (GreaterStashed) { 1069 FormatTok.NewlinesBefore = 0; 1070 FormatTok.WhiteSpaceStart = 1071 FormatTok.Tok.getLocation().getLocWithOffset(1); 1072 FormatTok.WhiteSpaceLength = 0; 1073 GreaterStashed = false; 1074 return FormatTok; 1075 } 1076 1077 FormatTok = FormatToken(); 1078 Lex.LexFromRawLexer(FormatTok.Tok); 1079 StringRef Text = rawTokenText(FormatTok.Tok); 1080 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); 1081 if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0) 1082 FormatTok.IsFirst = true; 1083 1084 // Consume and record whitespace until we find a significant token. 1085 while (FormatTok.Tok.is(tok::unknown)) { 1086 unsigned Newlines = Text.count('\n'); 1087 if (Newlines > 0) 1088 FormatTok.LastNewlineOffset = 1089 FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1; 1090 unsigned EscapedNewlines = Text.count("\\\n"); 1091 FormatTok.NewlinesBefore += Newlines; 1092 FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines; 1093 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); 1094 1095 if (FormatTok.Tok.is(tok::eof)) 1096 return FormatTok; 1097 Lex.LexFromRawLexer(FormatTok.Tok); 1098 Text = rawTokenText(FormatTok.Tok); 1099 } 1100 1101 // Now FormatTok is the next non-whitespace token. 1102 FormatTok.TokenLength = Text.size(); 1103 1104 // In case the token starts with escaped newlines, we want to 1105 // take them into account as whitespace - this pattern is quite frequent 1106 // in macro definitions. 1107 // FIXME: What do we want to do with other escaped spaces, and escaped 1108 // spaces or newlines in the middle of tokens? 1109 // FIXME: Add a more explicit test. 1110 unsigned i = 0; 1111 while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { 1112 // FIXME: ++FormatTok.NewlinesBefore is missing... 1113 FormatTok.WhiteSpaceLength += 2; 1114 FormatTok.TokenLength -= 2; 1115 i += 2; 1116 } 1117 1118 if (FormatTok.Tok.is(tok::raw_identifier)) { 1119 IdentifierInfo &Info = IdentTable.get(Text); 1120 FormatTok.Tok.setIdentifierInfo(&Info); 1121 FormatTok.Tok.setKind(Info.getTokenID()); 1122 } 1123 1124 if (FormatTok.Tok.is(tok::greatergreater)) { 1125 FormatTok.Tok.setKind(tok::greater); 1126 FormatTok.TokenLength = 1; 1127 GreaterStashed = true; 1128 } 1129 1130 // If we reformat comments, we remove trailing whitespace. Update the length 1131 // accordingly. 1132 if (FormatTok.Tok.is(tok::comment)) 1133 FormatTok.TokenLength = Text.rtrim().size(); 1134 1135 return FormatTok; 1136 } 1137 1138 IdentifierTable &getIdentTable() { return IdentTable; } 1139 1140 private: 1141 FormatToken FormatTok; 1142 bool GreaterStashed; 1143 Lexer &Lex; 1144 SourceManager &SourceMgr; 1145 IdentifierTable IdentTable; 1146 1147 /// Returns the text of \c FormatTok. 1148 StringRef rawTokenText(Token &Tok) { 1149 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1150 Tok.getLength()); 1151 } 1152 }; 1153 1154 class Formatter : public UnwrappedLineConsumer { 1155 public: 1156 Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex, 1157 SourceManager &SourceMgr, 1158 const std::vector<CharSourceRange> &Ranges) 1159 : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1160 Whitespaces(SourceMgr), Ranges(Ranges) {} 1161 1162 virtual ~Formatter() {} 1163 1164 tooling::Replacements format() { 1165 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); 1166 UnwrappedLineParser Parser(Diag, Style, Tokens, *this); 1167 StructuralError = Parser.parse(); 1168 unsigned PreviousEndOfLineColumn = 0; 1169 TokenAnnotator Annotator(Style, SourceMgr, Lex, 1170 Tokens.getIdentTable().get("in")); 1171 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1172 Annotator.annotate(AnnotatedLines[i]); 1173 } 1174 deriveLocalStyle(); 1175 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1176 Annotator.calculateFormattingInformation(AnnotatedLines[i]); 1177 1178 // Adapt level to the next line if this is a comment. 1179 // FIXME: Can/should this be done in the UnwrappedLineParser? 1180 if (i + 1 != e && AnnotatedLines[i].First.is(tok::comment) && 1181 AnnotatedLines[i].First.Children.empty() && 1182 AnnotatedLines[i + 1].First.isNot(tok::r_brace)) 1183 AnnotatedLines[i].Level = AnnotatedLines[i + 1].Level; 1184 } 1185 std::vector<int> IndentForLevel; 1186 bool PreviousLineWasTouched = false; 1187 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1188 E = AnnotatedLines.end(); 1189 I != E; ++I) { 1190 const AnnotatedLine &TheLine = *I; 1191 const FormatToken &FirstTok = TheLine.First.FormatTok; 1192 int Offset = getIndentOffset(TheLine.First); 1193 while (IndentForLevel.size() <= TheLine.Level) 1194 IndentForLevel.push_back(-1); 1195 IndentForLevel.resize(TheLine.Level + 1); 1196 bool WasMoved = 1197 PreviousLineWasTouched && FirstTok.NewlinesBefore == 0; 1198 if (TheLine.First.is(tok::eof)) { 1199 if (PreviousLineWasTouched) { 1200 unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u); 1201 Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0, 1202 /*WhitespaceStartColumn*/ 0, Style); 1203 } 1204 } else if (TheLine.Type != LT_Invalid && 1205 (WasMoved || touchesLine(TheLine))) { 1206 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); 1207 unsigned Indent = LevelIndent; 1208 if (static_cast<int>(Indent) + Offset >= 0) 1209 Indent += Offset; 1210 if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) { 1211 Indent = LevelIndent = SourceMgr.getSpellingColumnNumber( 1212 FirstTok.Tok.getLocation()) - 1; 1213 } else { 1214 formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective, 1215 PreviousEndOfLineColumn); 1216 } 1217 tryFitMultipleLinesInOne(Indent, I, E); 1218 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1219 TheLine.First, Whitespaces, 1220 StructuralError); 1221 PreviousEndOfLineColumn = 1222 Formatter.format(I + 1 != E ? &*(I + 1) : NULL); 1223 IndentForLevel[TheLine.Level] = LevelIndent; 1224 PreviousLineWasTouched = true; 1225 } else { 1226 if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) { 1227 unsigned Indent = 1228 SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; 1229 unsigned LevelIndent = Indent; 1230 if (static_cast<int>(LevelIndent) - Offset >= 0) 1231 LevelIndent -= Offset; 1232 IndentForLevel[TheLine.Level] = LevelIndent; 1233 1234 // Remove trailing whitespace of the previous line if it was touched. 1235 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) 1236 formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective, 1237 PreviousEndOfLineColumn); 1238 } 1239 // If we did not reformat this unwrapped line, the column at the end of 1240 // the last token is unchanged - thus, we can calculate the end of the 1241 // last token. 1242 SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation(); 1243 PreviousEndOfLineColumn = 1244 SourceMgr.getSpellingColumnNumber(LastLoc) + 1245 Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1; 1246 PreviousLineWasTouched = false; 1247 } 1248 } 1249 return Whitespaces.generateReplacements(); 1250 } 1251 1252 private: 1253 void deriveLocalStyle() { 1254 unsigned CountBoundToVariable = 0; 1255 unsigned CountBoundToType = 0; 1256 bool HasCpp03IncompatibleFormat = false; 1257 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1258 if (AnnotatedLines[i].First.Children.empty()) 1259 continue; 1260 AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0]; 1261 while (!Tok->Children.empty()) { 1262 if (Tok->Type == TT_PointerOrReference) { 1263 bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0; 1264 bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0; 1265 if (SpacesBefore && !SpacesAfter) 1266 ++CountBoundToVariable; 1267 else if (!SpacesBefore && SpacesAfter) 1268 ++CountBoundToType; 1269 } 1270 1271 if (Tok->Type == TT_TemplateCloser && 1272 Tok->Parent->Type == TT_TemplateCloser && 1273 Tok->FormatTok.WhiteSpaceLength == 0) 1274 HasCpp03IncompatibleFormat = true; 1275 Tok = &Tok->Children[0]; 1276 } 1277 } 1278 if (Style.DerivePointerBinding) { 1279 if (CountBoundToType > CountBoundToVariable) 1280 Style.PointerBindsToType = true; 1281 else if (CountBoundToType < CountBoundToVariable) 1282 Style.PointerBindsToType = false; 1283 } 1284 if (Style.Standard == FormatStyle::LS_Auto) { 1285 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1286 : FormatStyle::LS_Cpp03; 1287 } 1288 } 1289 1290 /// \brief Get the indent of \p Level from \p IndentForLevel. 1291 /// 1292 /// \p IndentForLevel must contain the indent for the level \c l 1293 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1294 /// that level is unknown. 1295 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 1296 if (IndentForLevel[Level] != -1) 1297 return IndentForLevel[Level]; 1298 if (Level == 0) 1299 return 0; 1300 return getIndent(IndentForLevel, Level - 1) + 2; 1301 } 1302 1303 /// \brief Get the offset of the line relatively to the level. 1304 /// 1305 /// For example, 'public:' labels in classes are offset by 1 or 2 1306 /// characters to the left from their level. 1307 int getIndentOffset(const AnnotatedToken &RootToken) { 1308 bool IsAccessModifier = false; 1309 if (RootToken.isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private)) 1310 IsAccessModifier = true; 1311 else if (RootToken.is(tok::at) && !RootToken.Children.empty() && 1312 (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) || 1313 RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) || 1314 RootToken.Children[0].isObjCAtKeyword(tok::objc_package) || 1315 RootToken.Children[0].isObjCAtKeyword(tok::objc_private))) 1316 IsAccessModifier = true; 1317 1318 if (IsAccessModifier) 1319 return Style.AccessModifierOffset; 1320 return 0; 1321 } 1322 1323 /// \brief Tries to merge lines into one. 1324 /// 1325 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1326 /// if possible; note that \c I will be incremented when lines are merged. 1327 /// 1328 /// Returns whether the resulting \c Line can fit in a single line. 1329 void tryFitMultipleLinesInOne(unsigned Indent, 1330 std::vector<AnnotatedLine>::iterator &I, 1331 std::vector<AnnotatedLine>::iterator E) { 1332 // We can never merge stuff if there are trailing line comments. 1333 if (I->Last->Type == TT_LineComment) 1334 return; 1335 1336 unsigned Limit = Style.ColumnLimit - Indent; 1337 // If we already exceed the column limit, we set 'Limit' to 0. The different 1338 // tryMerge..() functions can then decide whether to still do merging. 1339 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; 1340 1341 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1342 return; 1343 1344 if (I->Last->is(tok::l_brace)) { 1345 tryMergeSimpleBlock(I, E, Limit); 1346 } else if (I->First.is(tok::kw_if)) { 1347 tryMergeSimpleIf(I, E, Limit); 1348 } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline || 1349 I->First.FormatTok.IsFirst)) { 1350 tryMergeSimplePPDirective(I, E, Limit); 1351 } 1352 return; 1353 } 1354 1355 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1356 std::vector<AnnotatedLine>::iterator E, 1357 unsigned Limit) { 1358 if (Limit == 0) 1359 return; 1360 AnnotatedLine &Line = *I; 1361 if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline) 1362 return; 1363 if (I + 2 != E && (I + 2)->InPPDirective && 1364 !(I + 2)->First.FormatTok.HasUnescapedNewline) 1365 return; 1366 if (1 + (I + 1)->Last->TotalLength > Limit) 1367 return; 1368 join(Line, *(++I)); 1369 } 1370 1371 void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I, 1372 std::vector<AnnotatedLine>::iterator E, 1373 unsigned Limit) { 1374 if (Limit == 0) 1375 return; 1376 if (!Style.AllowShortIfStatementsOnASingleLine) 1377 return; 1378 if ((I + 1)->InPPDirective != I->InPPDirective || 1379 ((I + 1)->InPPDirective && 1380 (I + 1)->First.FormatTok.HasUnescapedNewline)) 1381 return; 1382 AnnotatedLine &Line = *I; 1383 if (Line.Last->isNot(tok::r_paren)) 1384 return; 1385 if (1 + (I + 1)->Last->TotalLength > Limit) 1386 return; 1387 if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment) 1388 return; 1389 // Only inline simple if's (no nested if or else). 1390 if (I + 2 != E && (I + 2)->First.is(tok::kw_else)) 1391 return; 1392 join(Line, *(++I)); 1393 } 1394 1395 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1396 std::vector<AnnotatedLine>::iterator E, 1397 unsigned Limit) { 1398 // First, check that the current line allows merging. This is the case if 1399 // we're not in a control flow statement and the last token is an opening 1400 // brace. 1401 AnnotatedLine &Line = *I; 1402 if (Line.First.isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, 1403 tok::kw_else, tok::kw_try, tok::kw_catch, 1404 tok::kw_for, 1405 // This gets rid of all ObjC @ keywords and methods. 1406 tok::at, tok::minus, tok::plus)) 1407 return; 1408 1409 AnnotatedToken *Tok = &(I + 1)->First; 1410 if (Tok->Children.empty() && Tok->is(tok::r_brace) && 1411 !Tok->MustBreakBefore) { 1412 // We merge empty blocks even if the line exceeds the column limit. 1413 Tok->SpacesRequiredBefore = 0; 1414 Tok->CanBreakBefore = true; 1415 join(Line, *(I + 1)); 1416 I += 1; 1417 } else if (Limit != 0) { 1418 // Check that we still have three lines and they fit into the limit. 1419 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1420 !nextTwoLinesFitInto(I, Limit)) 1421 return; 1422 1423 // Second, check that the next line does not contain any braces - if it 1424 // does, readability declines when putting it into a single line. 1425 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1426 return; 1427 do { 1428 if (Tok->isOneOf(tok::l_brace, tok::r_brace)) 1429 return; 1430 Tok = Tok->Children.empty() ? NULL : &Tok->Children.back(); 1431 } while (Tok != NULL); 1432 1433 // Last, check that the third line contains a single closing brace. 1434 Tok = &(I + 2)->First; 1435 if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) || 1436 Tok->MustBreakBefore) 1437 return; 1438 1439 join(Line, *(I + 1)); 1440 join(Line, *(I + 2)); 1441 I += 2; 1442 } 1443 } 1444 1445 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1446 unsigned Limit) { 1447 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1448 Limit; 1449 } 1450 1451 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1452 unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore; 1453 A.Last->Children.push_back(B.First); 1454 while (!A.Last->Children.empty()) { 1455 A.Last->Children[0].Parent = A.Last; 1456 A.Last->Children[0].TotalLength += LengthA; 1457 A.Last = &A.Last->Children[0]; 1458 } 1459 } 1460 1461 bool touchesRanges(const CharSourceRange &Range) { 1462 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1463 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), 1464 Ranges[i].getBegin()) && 1465 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1466 Range.getBegin())) 1467 return true; 1468 } 1469 return false; 1470 } 1471 1472 bool touchesLine(const AnnotatedLine &TheLine) { 1473 const FormatToken *First = &TheLine.First.FormatTok; 1474 const FormatToken *Last = &TheLine.Last->FormatTok; 1475 CharSourceRange LineRange = CharSourceRange::getTokenRange( 1476 First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset), 1477 Last->Tok.getLocation()); 1478 return touchesRanges(LineRange); 1479 } 1480 1481 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { 1482 const FormatToken *First = &TheLine.First.FormatTok; 1483 CharSourceRange LineRange = CharSourceRange::getCharRange( 1484 First->WhiteSpaceStart, 1485 First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset)); 1486 return touchesRanges(LineRange); 1487 } 1488 1489 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1490 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1491 } 1492 1493 /// \brief Add a new line and the required indent before the first Token 1494 /// of the \c UnwrappedLine if there was no structural parsing error. 1495 /// Returns the indent level of the \c UnwrappedLine. 1496 void formatFirstToken(const AnnotatedToken &RootToken, unsigned Indent, 1497 bool InPPDirective, unsigned PreviousEndOfLineColumn) { 1498 const FormatToken &Tok = RootToken.FormatTok; 1499 1500 unsigned Newlines = 1501 std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1502 if (Newlines == 0 && !Tok.IsFirst) 1503 Newlines = 1; 1504 1505 if (!InPPDirective || Tok.HasUnescapedNewline) { 1506 Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style); 1507 } else { 1508 Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent, 1509 PreviousEndOfLineColumn, Style); 1510 } 1511 } 1512 1513 DiagnosticsEngine &Diag; 1514 FormatStyle Style; 1515 Lexer &Lex; 1516 SourceManager &SourceMgr; 1517 WhitespaceManager Whitespaces; 1518 std::vector<CharSourceRange> Ranges; 1519 std::vector<AnnotatedLine> AnnotatedLines; 1520 bool StructuralError; 1521 }; 1522 1523 tooling::Replacements 1524 reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1525 std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) { 1526 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); 1527 OwningPtr<DiagnosticConsumer> DiagPrinter; 1528 if (DiagClient == 0) { 1529 DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts)); 1530 DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP()); 1531 DiagClient = DiagPrinter.get(); 1532 } 1533 DiagnosticsEngine Diagnostics( 1534 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, 1535 DiagClient, false); 1536 Diagnostics.setSourceManager(&SourceMgr); 1537 Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges); 1538 return formatter.format(); 1539 } 1540 1541 LangOptions getFormattingLangOpts() { 1542 LangOptions LangOpts; 1543 LangOpts.CPlusPlus = 1; 1544 LangOpts.CPlusPlus11 = 1; 1545 LangOpts.Bool = 1; 1546 LangOpts.ObjC1 = 1; 1547 LangOpts.ObjC2 = 1; 1548 return LangOpts; 1549 } 1550 1551 } // namespace format 1552 } // namespace clang 1553