1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-formatter" 17 18 #include "BreakableToken.h" 19 #include "TokenAnnotator.h" 20 #include "UnwrappedLineParser.h" 21 #include "WhitespaceManager.h" 22 #include "clang/Basic/Diagnostic.h" 23 #include "clang/Basic/OperatorPrecedence.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Format/Format.h" 26 #include "clang/Lex/Lexer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/Support/Allocator.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/YAMLTraits.h" 31 #include <queue> 32 #include <string> 33 34 namespace llvm { 35 namespace yaml { 36 template <> 37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> { 38 static void enumeration(IO &IO, 39 clang::format::FormatStyle::LanguageStandard &Value) { 40 IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03); 41 IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11); 42 IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto); 43 } 44 }; 45 46 template <> 47 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> { 48 static void 49 enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) { 50 IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach); 51 IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux); 52 IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup); 53 IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman); 54 } 55 }; 56 57 template <> 58 struct ScalarEnumerationTraits< 59 clang::format::FormatStyle::NamespaceIndentationKind> { 60 static void 61 enumeration(IO &IO, 62 clang::format::FormatStyle::NamespaceIndentationKind &Value) { 63 IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None); 64 IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner); 65 IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All); 66 } 67 }; 68 69 template <> struct MappingTraits<clang::format::FormatStyle> { 70 static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) { 71 if (IO.outputting()) { 72 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", "Mozilla" }; 73 ArrayRef<StringRef> Styles(StylesArray); 74 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 75 StringRef StyleName(Styles[i]); 76 clang::format::FormatStyle PredefinedStyle; 77 if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) && 78 Style == PredefinedStyle) { 79 IO.mapOptional("# BasedOnStyle", StyleName); 80 break; 81 } 82 } 83 } else { 84 StringRef BasedOnStyle; 85 IO.mapOptional("BasedOnStyle", BasedOnStyle); 86 if (!BasedOnStyle.empty()) 87 if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) { 88 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 89 return; 90 } 91 } 92 93 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 94 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 95 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 96 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 97 Style.AllowAllParametersOfDeclarationOnNextLine); 98 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 99 Style.AllowShortIfStatementsOnASingleLine); 100 IO.mapOptional("AllowShortLoopsOnASingleLine", 101 Style.AllowShortLoopsOnASingleLine); 102 IO.mapOptional("AlwaysBreakTemplateDeclarations", 103 Style.AlwaysBreakTemplateDeclarations); 104 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 105 Style.AlwaysBreakBeforeMultilineStrings); 106 IO.mapOptional("BreakBeforeBinaryOperators", 107 Style.BreakBeforeBinaryOperators); 108 IO.mapOptional("BreakConstructorInitializersBeforeComma", 109 Style.BreakConstructorInitializersBeforeComma); 110 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 111 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 112 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 113 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 114 IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding); 115 IO.mapOptional("ExperimentalAutoDetectBinPacking", 116 Style.ExperimentalAutoDetectBinPacking); 117 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 118 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 119 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 120 IO.mapOptional("ObjCSpaceBeforeProtocolList", 121 Style.ObjCSpaceBeforeProtocolList); 122 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 123 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 124 IO.mapOptional("PenaltyBreakFirstLessLess", 125 Style.PenaltyBreakFirstLessLess); 126 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 127 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 128 Style.PenaltyReturnTypeOnItsOwnLine); 129 IO.mapOptional("PointerBindsToType", Style.PointerBindsToType); 130 IO.mapOptional("SpacesBeforeTrailingComments", 131 Style.SpacesBeforeTrailingComments); 132 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 133 IO.mapOptional("Standard", Style.Standard); 134 IO.mapOptional("IndentWidth", Style.IndentWidth); 135 IO.mapOptional("UseTab", Style.UseTab); 136 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 137 IO.mapOptional("IndentFunctionDeclarationAfterType", 138 Style.IndentFunctionDeclarationAfterType); 139 } 140 }; 141 } 142 } 143 144 namespace clang { 145 namespace format { 146 147 void setDefaultPenalties(FormatStyle &Style) { 148 Style.PenaltyBreakComment = 45; 149 Style.PenaltyBreakFirstLessLess = 120; 150 Style.PenaltyBreakString = 1000; 151 Style.PenaltyExcessCharacter = 1000000; 152 } 153 154 FormatStyle getLLVMStyle() { 155 FormatStyle LLVMStyle; 156 LLVMStyle.AccessModifierOffset = -2; 157 LLVMStyle.AlignEscapedNewlinesLeft = false; 158 LLVMStyle.AlignTrailingComments = true; 159 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 160 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 161 LLVMStyle.AllowShortLoopsOnASingleLine = false; 162 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 163 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 164 LLVMStyle.BinPackParameters = true; 165 LLVMStyle.BreakBeforeBinaryOperators = false; 166 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 167 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 168 LLVMStyle.ColumnLimit = 80; 169 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 170 LLVMStyle.Cpp11BracedListStyle = false; 171 LLVMStyle.DerivePointerBinding = false; 172 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 173 LLVMStyle.IndentCaseLabels = false; 174 LLVMStyle.IndentFunctionDeclarationAfterType = false; 175 LLVMStyle.IndentWidth = 2; 176 LLVMStyle.MaxEmptyLinesToKeep = 1; 177 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 178 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 179 LLVMStyle.PointerBindsToType = false; 180 LLVMStyle.SpacesBeforeTrailingComments = 1; 181 LLVMStyle.Standard = FormatStyle::LS_Cpp03; 182 LLVMStyle.UseTab = false; 183 184 setDefaultPenalties(LLVMStyle); 185 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 186 187 return LLVMStyle; 188 } 189 190 FormatStyle getGoogleStyle() { 191 FormatStyle GoogleStyle; 192 GoogleStyle.AccessModifierOffset = -1; 193 GoogleStyle.AlignEscapedNewlinesLeft = true; 194 GoogleStyle.AlignTrailingComments = true; 195 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; 196 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 197 GoogleStyle.AllowShortLoopsOnASingleLine = true; 198 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 199 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 200 GoogleStyle.BinPackParameters = true; 201 GoogleStyle.BreakBeforeBinaryOperators = false; 202 GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 203 GoogleStyle.BreakConstructorInitializersBeforeComma = false; 204 GoogleStyle.ColumnLimit = 80; 205 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 206 GoogleStyle.Cpp11BracedListStyle = true; 207 GoogleStyle.DerivePointerBinding = true; 208 GoogleStyle.ExperimentalAutoDetectBinPacking = false; 209 GoogleStyle.IndentCaseLabels = true; 210 GoogleStyle.IndentFunctionDeclarationAfterType = true; 211 GoogleStyle.IndentWidth = 2; 212 GoogleStyle.MaxEmptyLinesToKeep = 1; 213 GoogleStyle.NamespaceIndentation = FormatStyle::NI_None; 214 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 215 GoogleStyle.PointerBindsToType = true; 216 GoogleStyle.SpacesBeforeTrailingComments = 2; 217 GoogleStyle.Standard = FormatStyle::LS_Auto; 218 GoogleStyle.UseTab = false; 219 220 setDefaultPenalties(GoogleStyle); 221 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 222 223 return GoogleStyle; 224 } 225 226 FormatStyle getChromiumStyle() { 227 FormatStyle ChromiumStyle = getGoogleStyle(); 228 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 229 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 230 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 231 ChromiumStyle.BinPackParameters = false; 232 ChromiumStyle.DerivePointerBinding = false; 233 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 234 return ChromiumStyle; 235 } 236 237 FormatStyle getMozillaStyle() { 238 FormatStyle MozillaStyle = getLLVMStyle(); 239 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 240 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 241 MozillaStyle.DerivePointerBinding = true; 242 MozillaStyle.IndentCaseLabels = true; 243 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 244 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 245 MozillaStyle.PointerBindsToType = true; 246 return MozillaStyle; 247 } 248 249 FormatStyle getWebKitStyle() { 250 FormatStyle Style = getLLVMStyle(); 251 Style.AccessModifierOffset = -4; 252 Style.AlignTrailingComments = false; 253 Style.BreakBeforeBinaryOperators = true; 254 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 255 Style.BreakConstructorInitializersBeforeComma = true; 256 Style.ColumnLimit = 0; 257 Style.IndentWidth = 4; 258 Style.NamespaceIndentation = FormatStyle::NI_Inner; 259 Style.PointerBindsToType = true; 260 return Style; 261 } 262 263 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) { 264 if (Name.equals_lower("llvm")) 265 *Style = getLLVMStyle(); 266 else if (Name.equals_lower("chromium")) 267 *Style = getChromiumStyle(); 268 else if (Name.equals_lower("mozilla")) 269 *Style = getMozillaStyle(); 270 else if (Name.equals_lower("google")) 271 *Style = getGoogleStyle(); 272 else if (Name.equals_lower("webkit")) 273 *Style = getWebKitStyle(); 274 else 275 return false; 276 277 return true; 278 } 279 280 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 281 if (Text.trim().empty()) 282 return llvm::make_error_code(llvm::errc::invalid_argument); 283 llvm::yaml::Input Input(Text); 284 Input >> *Style; 285 return Input.error(); 286 } 287 288 std::string configurationAsText(const FormatStyle &Style) { 289 std::string Text; 290 llvm::raw_string_ostream Stream(Text); 291 llvm::yaml::Output Output(Stream); 292 // We use the same mapping method for input and output, so we need a non-const 293 // reference here. 294 FormatStyle NonConstStyle = Style; 295 Output << NonConstStyle; 296 return Stream.str(); 297 } 298 299 // Returns the length of everything up to the first possible line break after 300 // the ), ], } or > matching \c Tok. 301 static unsigned getLengthToMatchingParen(const FormatToken &Tok) { 302 if (Tok.MatchingParen == NULL) 303 return 0; 304 FormatToken *End = Tok.MatchingParen; 305 while (End->Next && !End->Next->CanBreakBefore) { 306 End = End->Next; 307 } 308 return End->TotalLength - Tok.TotalLength + 1; 309 } 310 311 namespace { 312 313 class UnwrappedLineFormatter { 314 public: 315 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 316 const AnnotatedLine &Line, unsigned FirstIndent, 317 const FormatToken *RootToken, 318 WhitespaceManager &Whitespaces, 319 encoding::Encoding Encoding, 320 bool BinPackInconclusiveFunctions) 321 : Style(Style), SourceMgr(SourceMgr), Line(Line), 322 FirstIndent(FirstIndent), RootToken(RootToken), 323 Whitespaces(Whitespaces), Count(0), Encoding(Encoding), 324 BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {} 325 326 /// \brief Formats an \c UnwrappedLine. 327 void format(const AnnotatedLine *NextLine) { 328 // Initialize state dependent on indent. 329 LineState State; 330 State.Column = FirstIndent; 331 State.NextToken = RootToken; 332 State.Stack.push_back(ParenState(FirstIndent, FirstIndent, 333 /*AvoidBinPacking=*/false, 334 /*NoLineBreak=*/false)); 335 State.LineContainsContinuedForLoopSection = false; 336 State.ParenLevel = 0; 337 State.StartOfStringLiteral = 0; 338 State.StartOfLineLevel = State.ParenLevel; 339 State.LowestLevelOnLine = State.ParenLevel; 340 State.IgnoreStackForComparison = false; 341 342 // The first token has already been indented and thus consumed. 343 moveStateToNextToken(State, /*DryRun=*/false, /*Newline=*/false); 344 345 if (Style.ColumnLimit == 0) { 346 formatWithoutColumnLimit(State); 347 return; 348 } 349 350 // If everything fits on a single line, just put it there. 351 unsigned ColumnLimit = Style.ColumnLimit; 352 if (NextLine && NextLine->InPPDirective && 353 !NextLine->First->HasUnescapedNewline) 354 ColumnLimit = getColumnLimit(); 355 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { 356 while (State.NextToken != NULL) { 357 addTokenToState(false, false, State); 358 } 359 } 360 361 // If the ObjC method declaration does not fit on a line, we should format 362 // it with one arg per line. 363 if (Line.Type == LT_ObjCMethodDecl) 364 State.Stack.back().BreakBeforeParameter = true; 365 366 // Find best solution in solution space. 367 analyzeSolutionSpace(State); 368 } 369 370 private: 371 void DebugTokenState(const FormatToken &FormatTok) { 372 const Token &Tok = FormatTok.Tok; 373 llvm::dbgs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 374 Tok.getLength()); 375 llvm::dbgs(); 376 } 377 378 struct ParenState { 379 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, 380 bool NoLineBreak) 381 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 382 BreakBeforeClosingBrace(false), QuestionColumn(0), 383 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 384 NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), 385 StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), 386 CallContinuation(0), VariablePos(0), ContainsLineBreak(false) {} 387 388 /// \brief The position to which a specific parenthesis level needs to be 389 /// indented. 390 unsigned Indent; 391 392 /// \brief The position of the last space on each level. 393 /// 394 /// Used e.g. to break like: 395 /// functionCall(Parameter, otherCall( 396 /// OtherParameter)); 397 unsigned LastSpace; 398 399 /// \brief The position the first "<<" operator encountered on each level. 400 /// 401 /// Used to align "<<" operators. 0 if no such operator has been encountered 402 /// on a level. 403 unsigned FirstLessLess; 404 405 /// \brief Whether a newline needs to be inserted before the block's closing 406 /// brace. 407 /// 408 /// We only want to insert a newline before the closing brace if there also 409 /// was a newline after the beginning left brace. 410 bool BreakBeforeClosingBrace; 411 412 /// \brief The column of a \c ? in a conditional expression; 413 unsigned QuestionColumn; 414 415 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 416 /// lines, in this context. 417 bool AvoidBinPacking; 418 419 /// \brief Break after the next comma (or all the commas in this context if 420 /// \c AvoidBinPacking is \c true). 421 bool BreakBeforeParameter; 422 423 /// \brief Line breaking in this context would break a formatting rule. 424 bool NoLineBreak; 425 426 /// \brief The position of the colon in an ObjC method declaration/call. 427 unsigned ColonPos; 428 429 /// \brief The start of the most recent function in a builder-type call. 430 unsigned StartOfFunctionCall; 431 432 /// \brief Contains the start of array subscript expressions, so that they 433 /// can be aligned. 434 unsigned StartOfArraySubscripts; 435 436 /// \brief If a nested name specifier was broken over multiple lines, this 437 /// contains the start column of the second line. Otherwise 0. 438 unsigned NestedNameSpecifierContinuation; 439 440 /// \brief If a call expression was broken over multiple lines, this 441 /// contains the start column of the second line. Otherwise 0. 442 unsigned CallContinuation; 443 444 /// \brief The column of the first variable name in a variable declaration. 445 /// 446 /// Used to align further variables if necessary. 447 unsigned VariablePos; 448 449 /// \brief \c true if this \c ParenState already contains a line-break. 450 /// 451 /// The first line break in a certain \c ParenState causes extra penalty so 452 /// that clang-format prefers similar breaks, i.e. breaks in the same 453 /// parenthesis. 454 bool ContainsLineBreak; 455 456 bool operator<(const ParenState &Other) const { 457 if (Indent != Other.Indent) 458 return Indent < Other.Indent; 459 if (LastSpace != Other.LastSpace) 460 return LastSpace < Other.LastSpace; 461 if (FirstLessLess != Other.FirstLessLess) 462 return FirstLessLess < Other.FirstLessLess; 463 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 464 return BreakBeforeClosingBrace; 465 if (QuestionColumn != Other.QuestionColumn) 466 return QuestionColumn < Other.QuestionColumn; 467 if (AvoidBinPacking != Other.AvoidBinPacking) 468 return AvoidBinPacking; 469 if (BreakBeforeParameter != Other.BreakBeforeParameter) 470 return BreakBeforeParameter; 471 if (NoLineBreak != Other.NoLineBreak) 472 return NoLineBreak; 473 if (ColonPos != Other.ColonPos) 474 return ColonPos < Other.ColonPos; 475 if (StartOfFunctionCall != Other.StartOfFunctionCall) 476 return StartOfFunctionCall < Other.StartOfFunctionCall; 477 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 478 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 479 if (CallContinuation != Other.CallContinuation) 480 return CallContinuation < Other.CallContinuation; 481 if (VariablePos != Other.VariablePos) 482 return VariablePos < Other.VariablePos; 483 if (ContainsLineBreak != Other.ContainsLineBreak) 484 return ContainsLineBreak < Other.ContainsLineBreak; 485 return false; 486 } 487 }; 488 489 /// \brief The current state when indenting a unwrapped line. 490 /// 491 /// As the indenting tries different combinations this is copied by value. 492 struct LineState { 493 /// \brief The number of used columns in the current line. 494 unsigned Column; 495 496 /// \brief The token that needs to be next formatted. 497 const FormatToken *NextToken; 498 499 /// \brief \c true if this line contains a continued for-loop section. 500 bool LineContainsContinuedForLoopSection; 501 502 /// \brief The level of nesting inside (), [], <> and {}. 503 unsigned ParenLevel; 504 505 /// \brief The \c ParenLevel at the start of this line. 506 unsigned StartOfLineLevel; 507 508 /// \brief The lowest \c ParenLevel on the current line. 509 unsigned LowestLevelOnLine; 510 511 /// \brief The start column of the string literal, if we're in a string 512 /// literal sequence, 0 otherwise. 513 unsigned StartOfStringLiteral; 514 515 /// \brief A stack keeping track of properties applying to parenthesis 516 /// levels. 517 std::vector<ParenState> Stack; 518 519 /// \brief Ignore the stack of \c ParenStates for state comparison. 520 /// 521 /// In long and deeply nested unwrapped lines, the current algorithm can 522 /// be insufficient for finding the best formatting with a reasonable amount 523 /// of time and memory. Setting this flag will effectively lead to the 524 /// algorithm not analyzing some combinations. However, these combinations 525 /// rarely contain the optimal solution: In short, accepting a higher 526 /// penalty early would need to lead to different values in the \c 527 /// ParenState stack (in an otherwise identical state) and these different 528 /// values would need to lead to a significant amount of avoided penalty 529 /// later. 530 /// 531 /// FIXME: Come up with a better algorithm instead. 532 bool IgnoreStackForComparison; 533 534 /// \brief Comparison operator to be able to used \c LineState in \c map. 535 bool operator<(const LineState &Other) const { 536 if (NextToken != Other.NextToken) 537 return NextToken < Other.NextToken; 538 if (Column != Other.Column) 539 return Column < Other.Column; 540 if (LineContainsContinuedForLoopSection != 541 Other.LineContainsContinuedForLoopSection) 542 return LineContainsContinuedForLoopSection; 543 if (ParenLevel != Other.ParenLevel) 544 return ParenLevel < Other.ParenLevel; 545 if (StartOfLineLevel != Other.StartOfLineLevel) 546 return StartOfLineLevel < Other.StartOfLineLevel; 547 if (LowestLevelOnLine != Other.LowestLevelOnLine) 548 return LowestLevelOnLine < Other.LowestLevelOnLine; 549 if (StartOfStringLiteral != Other.StartOfStringLiteral) 550 return StartOfStringLiteral < Other.StartOfStringLiteral; 551 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 552 return false; 553 return Stack < Other.Stack; 554 } 555 }; 556 557 /// \brief Formats the line starting at \p State, simply keeping all of the 558 /// input's line breaking decisions. 559 void formatWithoutColumnLimit(LineState &State) { 560 while (State.NextToken != NULL) { 561 bool Newline = mustBreak(State) || 562 (canBreak(State) && State.NextToken->NewlinesBefore > 0); 563 addTokenToState(Newline, /*DryRun=*/false, State); 564 } 565 } 566 567 /// \brief Appends the next token to \p State and updates information 568 /// necessary for indentation. 569 /// 570 /// Puts the token on the current line if \p Newline is \c false and adds a 571 /// line break and necessary indentation otherwise. 572 /// 573 /// If \p DryRun is \c false, also creates and stores the required 574 /// \c Replacement. 575 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { 576 const FormatToken &Current = *State.NextToken; 577 const FormatToken &Previous = *State.NextToken->Previous; 578 579 // Extra penalty that needs to be added because of the way certain line 580 // breaks are chosen. 581 unsigned ExtraPenalty = 0; 582 583 if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) { 584 // FIXME: Is this correct? 585 int WhitespaceLength = SourceMgr.getSpellingColumnNumber( 586 State.NextToken->WhitespaceRange.getEnd()) - 587 SourceMgr.getSpellingColumnNumber( 588 State.NextToken->WhitespaceRange.getBegin()); 589 State.Column += WhitespaceLength + State.NextToken->CodePointCount; 590 State.NextToken = State.NextToken->Next; 591 return 0; 592 } 593 594 // If we are continuing an expression, we want to indent an extra 4 spaces. 595 unsigned ContinuationIndent = 596 std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4; 597 if (Newline) { 598 State.Stack.back().ContainsLineBreak = true; 599 if (Current.is(tok::r_brace)) { 600 if (Current.BlockKind == BK_BracedInit) 601 State.Column = State.Stack[State.Stack.size() - 2].LastSpace; 602 else 603 State.Column = FirstIndent; 604 } else if (Current.is(tok::string_literal) && 605 State.StartOfStringLiteral != 0) { 606 State.Column = State.StartOfStringLiteral; 607 State.Stack.back().BreakBeforeParameter = true; 608 } else if (Current.is(tok::lessless) && 609 State.Stack.back().FirstLessLess != 0) { 610 State.Column = State.Stack.back().FirstLessLess; 611 } else if (Current.isOneOf(tok::period, tok::arrow) && 612 Current.Type != TT_DesignatedInitializerPeriod) { 613 if (State.Stack.back().CallContinuation == 0) { 614 State.Column = ContinuationIndent; 615 State.Stack.back().CallContinuation = State.Column; 616 } else { 617 State.Column = State.Stack.back().CallContinuation; 618 } 619 } else if (Current.Type == TT_ConditionalExpr) { 620 State.Column = State.Stack.back().QuestionColumn; 621 } else if (Previous.is(tok::comma) && 622 State.Stack.back().VariablePos != 0) { 623 State.Column = State.Stack.back().VariablePos; 624 } else if (Previous.ClosesTemplateDeclaration || 625 ((Current.Type == TT_StartOfName || 626 Current.is(tok::kw_operator)) && 627 State.ParenLevel == 0 && 628 (!Style.IndentFunctionDeclarationAfterType || 629 Line.StartsDefinition))) { 630 State.Column = State.Stack.back().Indent; 631 } else if (Current.Type == TT_ObjCSelectorName) { 632 if (State.Stack.back().ColonPos > Current.CodePointCount) { 633 State.Column = State.Stack.back().ColonPos - Current.CodePointCount; 634 } else { 635 State.Column = State.Stack.back().Indent; 636 State.Stack.back().ColonPos = State.Column + Current.CodePointCount; 637 } 638 } else if (Current.is(tok::l_square) && 639 Current.Type != TT_ObjCMethodExpr) { 640 if (State.Stack.back().StartOfArraySubscripts != 0) 641 State.Column = State.Stack.back().StartOfArraySubscripts; 642 else 643 State.Column = ContinuationIndent; 644 } else if (Current.Type == TT_StartOfName || 645 Previous.isOneOf(tok::coloncolon, tok::equal) || 646 Previous.Type == TT_ObjCMethodExpr) { 647 State.Column = ContinuationIndent; 648 } else { 649 State.Column = State.Stack.back().Indent; 650 // Ensure that we fall back to indenting 4 spaces instead of just 651 // flushing continuations left. 652 if (State.Column == FirstIndent) 653 State.Column += 4; 654 } 655 656 if (Current.is(tok::question)) 657 State.Stack.back().BreakBeforeParameter = true; 658 if ((Previous.isOneOf(tok::comma, tok::semi) && 659 !State.Stack.back().AvoidBinPacking) || 660 Previous.Type == TT_BinaryOperator) 661 State.Stack.back().BreakBeforeParameter = false; 662 if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0) 663 State.Stack.back().BreakBeforeParameter = false; 664 665 if (!DryRun) { 666 unsigned NewLines = 1; 667 if (Current.is(tok::comment)) 668 NewLines = std::max( 669 NewLines, 670 std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); 671 Whitespaces.replaceWhitespace(Current, NewLines, State.Column, 672 State.Column, Line.InPPDirective); 673 } 674 675 if (!Current.isTrailingComment()) 676 State.Stack.back().LastSpace = State.Column; 677 if (Current.isOneOf(tok::arrow, tok::period) && 678 Current.Type != TT_DesignatedInitializerPeriod) 679 State.Stack.back().LastSpace += Current.CodePointCount; 680 State.StartOfLineLevel = State.ParenLevel; 681 State.LowestLevelOnLine = State.ParenLevel; 682 683 // Any break on this level means that the parent level has been broken 684 // and we need to avoid bin packing there. 685 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 686 State.Stack[i].BreakBeforeParameter = true; 687 } 688 const FormatToken *TokenBefore = Current.getPreviousNonComment(); 689 if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) && 690 TokenBefore->Type != TT_TemplateCloser && 691 TokenBefore->Type != TT_BinaryOperator && !TokenBefore->opensScope()) 692 State.Stack.back().BreakBeforeParameter = true; 693 694 // If we break after {, we should also break before the corresponding }. 695 if (Previous.is(tok::l_brace)) 696 State.Stack.back().BreakBeforeClosingBrace = true; 697 698 if (State.Stack.back().AvoidBinPacking) { 699 // If we are breaking after '(', '{', '<', this is not bin packing 700 // unless AllowAllParametersOfDeclarationOnNextLine is false. 701 if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) || 702 Previous.Type == TT_BinaryOperator) || 703 (!Style.AllowAllParametersOfDeclarationOnNextLine && 704 Line.MustBeDeclaration)) 705 State.Stack.back().BreakBeforeParameter = true; 706 } 707 708 // Breaking before the first "<<" is generally not desirable. 709 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 710 ExtraPenalty += Style.PenaltyBreakFirstLessLess; 711 712 } else { 713 if (Current.is(tok::equal) && 714 (RootToken->is(tok::kw_for) || State.ParenLevel == 0) && 715 State.Stack.back().VariablePos == 0) { 716 State.Stack.back().VariablePos = State.Column; 717 // Move over * and & if they are bound to the variable name. 718 const FormatToken *Tok = &Previous; 719 while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) { 720 State.Stack.back().VariablePos -= Tok->CodePointCount; 721 if (Tok->SpacesRequiredBefore != 0) 722 break; 723 Tok = Tok->Previous; 724 } 725 if (Previous.PartOfMultiVariableDeclStmt) 726 State.Stack.back().LastSpace = State.Stack.back().VariablePos; 727 } 728 729 unsigned Spaces = State.NextToken->SpacesRequiredBefore; 730 731 if (!DryRun) 732 Whitespaces.replaceWhitespace(Current, 0, Spaces, 733 State.Column + Spaces); 734 735 if (Current.Type == TT_ObjCSelectorName && 736 State.Stack.back().ColonPos == 0) { 737 if (State.Stack.back().Indent + Current.LongestObjCSelectorName > 738 State.Column + Spaces + Current.CodePointCount) 739 State.Stack.back().ColonPos = 740 State.Stack.back().Indent + Current.LongestObjCSelectorName; 741 else 742 State.Stack.back().ColonPos = 743 State.Column + Spaces + Current.CodePointCount; 744 } 745 746 if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && 747 Current.Type != TT_LineComment) 748 State.Stack.back().Indent = State.Column + Spaces; 749 if (Previous.is(tok::comma) && !Current.isTrailingComment() && 750 State.Stack.back().AvoidBinPacking) 751 State.Stack.back().NoLineBreak = true; 752 753 State.Column += Spaces; 754 if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) 755 // Treat the condition inside an if as if it was a second function 756 // parameter, i.e. let nested calls have an indent of 4. 757 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 758 else if (Previous.is(tok::comma)) 759 State.Stack.back().LastSpace = State.Column; 760 else if ((Previous.Type == TT_BinaryOperator || 761 Previous.Type == TT_ConditionalExpr || 762 Previous.Type == TT_CtorInitializerColon) && 763 !(Previous.getPrecedence() == prec::Assignment && 764 Current.FakeLParens.empty())) 765 // Always indent relative to the RHS of the expression unless this is a 766 // simple assignment without binary expression on the RHS. 767 State.Stack.back().LastSpace = State.Column; 768 else if (Previous.Type == TT_InheritanceColon) 769 State.Stack.back().Indent = State.Column; 770 else if (Previous.opensScope()) { 771 // If a function has multiple parameters (including a single parameter 772 // that is a binary expression) or a trailing call, indent all 773 // parameters from the opening parenthesis. This avoids confusing 774 // indents like: 775 // OuterFunction(InnerFunctionCall( 776 // ParameterToInnerFunction), 777 // SecondParameterToOuterFunction); 778 bool HasMultipleParameters = !Current.FakeLParens.empty(); 779 bool HasTrailingCall = false; 780 if (Previous.MatchingParen) { 781 const FormatToken *Next = Previous.MatchingParen->getNextNonComment(); 782 if (Next && Next->isOneOf(tok::period, tok::arrow)) 783 HasTrailingCall = true; 784 } 785 if (HasMultipleParameters || HasTrailingCall) 786 State.Stack.back().LastSpace = State.Column; 787 } 788 } 789 790 return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty; 791 } 792 793 /// \brief Mark the next token as consumed in \p State and modify its stacks 794 /// accordingly. 795 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline) { 796 const FormatToken &Current = *State.NextToken; 797 assert(State.Stack.size()); 798 799 if (Current.Type == TT_InheritanceColon) 800 State.Stack.back().AvoidBinPacking = true; 801 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 802 State.Stack.back().FirstLessLess = State.Column; 803 if (Current.is(tok::l_square) && 804 State.Stack.back().StartOfArraySubscripts == 0) 805 State.Stack.back().StartOfArraySubscripts = State.Column; 806 if (Current.is(tok::question)) 807 State.Stack.back().QuestionColumn = State.Column; 808 if (!Current.opensScope() && !Current.closesScope()) 809 State.LowestLevelOnLine = 810 std::min(State.LowestLevelOnLine, State.ParenLevel); 811 if (Current.isOneOf(tok::period, tok::arrow) && 812 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) 813 State.Stack.back().StartOfFunctionCall = 814 Current.LastInChainOfCalls ? 0 815 : State.Column + Current.CodePointCount; 816 if (Current.Type == TT_CtorInitializerColon) { 817 // Indent 2 from the column, so: 818 // SomeClass::SomeClass() 819 // : First(...), ... 820 // Next(...) 821 // ^ line up here. 822 if (!Style.BreakConstructorInitializersBeforeComma) 823 State.Stack.back().Indent = State.Column + 2; 824 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 825 State.Stack.back().AvoidBinPacking = true; 826 State.Stack.back().BreakBeforeParameter = false; 827 } 828 829 // If return returns a binary expression, align after it. 830 if (Current.is(tok::kw_return) && !Current.FakeLParens.empty()) 831 State.Stack.back().LastSpace = State.Column + 7; 832 833 // In ObjC method declaration we align on the ":" of parameters, but we need 834 // to ensure that we indent parameters on subsequent lines by at least 4. 835 if (Current.Type == TT_ObjCMethodSpecifier) 836 State.Stack.back().Indent += 4; 837 838 // Insert scopes created by fake parenthesis. 839 const FormatToken *Previous = Current.getPreviousNonComment(); 840 // Don't add extra indentation for the first fake parenthesis after 841 // 'return', assignements or opening <({[. The indentation for these cases 842 // is special cased. 843 bool SkipFirstExtraIndent = 844 Current.is(tok::kw_return) || 845 (Previous && (Previous->opensScope() || 846 Previous->getPrecedence() == prec::Assignment)); 847 for (SmallVectorImpl<prec::Level>::const_reverse_iterator 848 I = Current.FakeLParens.rbegin(), 849 E = Current.FakeLParens.rend(); 850 I != E; ++I) { 851 ParenState NewParenState = State.Stack.back(); 852 NewParenState.ContainsLineBreak = false; 853 NewParenState.Indent = 854 std::max(std::max(State.Column, NewParenState.Indent), 855 State.Stack.back().LastSpace); 856 857 // Always indent conditional expressions. Never indent expression where 858 // the 'operator' is ',', ';' or an assignment (i.e. *I <= 859 // prec::Assignment) as those have different indentation rules. Indent 860 // other expression, unless the indentation needs to be skipped. 861 if (*I == prec::Conditional || 862 (!SkipFirstExtraIndent && *I > prec::Assignment && 863 !Style.BreakBeforeBinaryOperators)) 864 NewParenState.Indent += 4; 865 if (Previous && !Previous->opensScope()) 866 NewParenState.BreakBeforeParameter = false; 867 State.Stack.push_back(NewParenState); 868 SkipFirstExtraIndent = false; 869 } 870 871 // If we encounter an opening (, [, { or <, we add a level to our stacks to 872 // prepare for the following tokens. 873 if (Current.opensScope()) { 874 unsigned NewIndent; 875 unsigned LastSpace = State.Stack.back().LastSpace; 876 bool AvoidBinPacking; 877 if (Current.is(tok::l_brace)) { 878 NewIndent = 879 LastSpace + (Style.Cpp11BracedListStyle ? 4 : Style.IndentWidth); 880 const FormatToken *NextNoComment = Current.getNextNonComment(); 881 AvoidBinPacking = NextNoComment && 882 NextNoComment->Type == TT_DesignatedInitializerPeriod; 883 } else { 884 NewIndent = 885 4 + std::max(LastSpace, State.Stack.back().StartOfFunctionCall); 886 AvoidBinPacking = !Style.BinPackParameters || 887 (Style.ExperimentalAutoDetectBinPacking && 888 (Current.PackingKind == PPK_OnePerLine || 889 (!BinPackInconclusiveFunctions && 890 Current.PackingKind == PPK_Inconclusive))); 891 } 892 893 State.Stack.push_back(ParenState(NewIndent, LastSpace, AvoidBinPacking, 894 State.Stack.back().NoLineBreak)); 895 ++State.ParenLevel; 896 } 897 898 // If this '[' opens an ObjC call, determine whether all parameters fit into 899 // one line and put one per line if they don't. 900 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && 901 Current.MatchingParen != NULL) { 902 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) 903 State.Stack.back().BreakBeforeParameter = true; 904 } 905 906 // If we encounter a closing ), ], } or >, we can remove a level from our 907 // stacks. 908 if (Current.isOneOf(tok::r_paren, tok::r_square) || 909 (Current.is(tok::r_brace) && State.NextToken != RootToken) || 910 State.NextToken->Type == TT_TemplateCloser) { 911 State.Stack.pop_back(); 912 --State.ParenLevel; 913 } 914 if (Current.is(tok::r_square)) { 915 // If this ends the array subscript expr, reset the corresponding value. 916 const FormatToken *NextNonComment = Current.getNextNonComment(); 917 if (NextNonComment && NextNonComment->isNot(tok::l_square)) 918 State.Stack.back().StartOfArraySubscripts = 0; 919 } 920 921 // Remove scopes created by fake parenthesis. 922 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { 923 unsigned VariablePos = State.Stack.back().VariablePos; 924 State.Stack.pop_back(); 925 State.Stack.back().VariablePos = VariablePos; 926 } 927 928 if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) { 929 State.StartOfStringLiteral = State.Column; 930 } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash, 931 tok::string_literal)) { 932 State.StartOfStringLiteral = 0; 933 } 934 935 State.Column += Current.CodePointCount; 936 937 State.NextToken = State.NextToken->Next; 938 939 if (!Newline && Style.AlwaysBreakBeforeMultilineStrings && 940 Current.is(tok::string_literal) && Current.CanBreakBefore) 941 return 0; 942 943 return breakProtrudingToken(Current, State, DryRun); 944 } 945 946 /// \brief If the current token sticks out over the end of the line, break 947 /// it if possible. 948 /// 949 /// \returns An extra penalty if a token was broken, otherwise 0. 950 /// 951 /// The returned penalty will cover the cost of the additional line breaks and 952 /// column limit violation in all lines except for the last one. The penalty 953 /// for the column limit violation in the last line (and in single line 954 /// tokens) is handled in \c addNextStateToQueue. 955 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 956 bool DryRun) { 957 llvm::OwningPtr<BreakableToken> Token; 958 unsigned StartColumn = State.Column - Current.CodePointCount; 959 unsigned OriginalStartColumn = 960 SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 961 1; 962 963 if (Current.is(tok::string_literal) && 964 Current.Type != TT_ImplicitStringLiteral) { 965 // Only break up default narrow strings. 966 if (!Current.TokenText.startswith("\"")) 967 return 0; 968 // Don't break string literals with escaped newlines. As clang-format must 969 // not change the string's content, it is unlikely that we'll end up with 970 // a better format. 971 if (Current.TokenText.find("\\\n") != StringRef::npos) 972 return 0; 973 // Exempts unterminated string literals from line breaking. The user will 974 // likely want to terminate the string before any line breaking is done. 975 if (Current.IsUnterminatedLiteral) 976 return 0; 977 978 Token.reset(new BreakableStringLiteral(Current, StartColumn, 979 Line.InPPDirective, Encoding)); 980 } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { 981 Token.reset(new BreakableBlockComment( 982 Style, Current, StartColumn, OriginalStartColumn, !Current.Previous, 983 Line.InPPDirective, Encoding)); 984 } else if (Current.Type == TT_LineComment && 985 (Current.Previous == NULL || 986 Current.Previous->Type != TT_ImplicitStringLiteral)) { 987 // Don't break line comments with escaped newlines. These look like 988 // separate line comments, but in fact contain a single line comment with 989 // multiple lines including leading whitespace and the '//' markers. 990 // 991 // FIXME: If we want to handle them correctly, we'll need to adjust 992 // leading whitespace in consecutive lines when changing indentation of 993 // the first line similar to what we do with block comments. 994 StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n"); 995 if (EscapedNewlinePos != StringRef::npos) { 996 State.Column = 997 StartColumn + 998 encoding::getCodePointCount( 999 Current.TokenText.substr(0, EscapedNewlinePos), Encoding) + 1000 1; 1001 return 0; 1002 } 1003 1004 Token.reset(new BreakableLineComment(Current, StartColumn, 1005 Line.InPPDirective, Encoding)); 1006 } else { 1007 return 0; 1008 } 1009 if (Current.UnbreakableTailLength >= getColumnLimit()) 1010 return 0; 1011 1012 unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength; 1013 bool BreakInserted = false; 1014 unsigned Penalty = 0; 1015 unsigned RemainingTokenColumns = 0; 1016 for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); 1017 LineIndex != EndIndex; ++LineIndex) { 1018 if (!DryRun) 1019 Token->replaceWhitespaceBefore(LineIndex, Whitespaces); 1020 unsigned TailOffset = 0; 1021 RemainingTokenColumns = Token->getLineLengthAfterSplit( 1022 LineIndex, TailOffset, StringRef::npos); 1023 while (RemainingTokenColumns > RemainingSpace) { 1024 BreakableToken::Split Split = 1025 Token->getSplit(LineIndex, TailOffset, getColumnLimit()); 1026 if (Split.first == StringRef::npos) { 1027 // The last line's penalty is handled in addNextStateToQueue(). 1028 if (LineIndex < EndIndex - 1) 1029 Penalty += Style.PenaltyExcessCharacter * 1030 (RemainingTokenColumns - RemainingSpace); 1031 break; 1032 } 1033 assert(Split.first != 0); 1034 unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( 1035 LineIndex, TailOffset + Split.first + Split.second, 1036 StringRef::npos); 1037 assert(NewRemainingTokenColumns < RemainingTokenColumns); 1038 if (!DryRun) 1039 Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); 1040 Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString 1041 : Style.PenaltyBreakComment; 1042 unsigned ColumnsUsed = 1043 Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); 1044 if (ColumnsUsed > getColumnLimit()) { 1045 Penalty += 1046 Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit()); 1047 } 1048 TailOffset += Split.first + Split.second; 1049 RemainingTokenColumns = NewRemainingTokenColumns; 1050 BreakInserted = true; 1051 } 1052 } 1053 1054 State.Column = RemainingTokenColumns; 1055 1056 if (BreakInserted) { 1057 // If we break the token inside a parameter list, we need to break before 1058 // the next parameter on all levels, so that the next parameter is clearly 1059 // visible. Line comments already introduce a break. 1060 if (Current.Type != TT_LineComment) { 1061 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) 1062 State.Stack[i].BreakBeforeParameter = true; 1063 } 1064 1065 State.Stack.back().LastSpace = StartColumn; 1066 } 1067 return Penalty; 1068 } 1069 1070 unsigned getColumnLimit() { 1071 // In preprocessor directives reserve two chars for trailing " \" 1072 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); 1073 } 1074 1075 /// \brief An edge in the solution space from \c Previous->State to \c State, 1076 /// inserting a newline dependent on the \c NewLine. 1077 struct StateNode { 1078 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 1079 : State(State), NewLine(NewLine), Previous(Previous) {} 1080 LineState State; 1081 bool NewLine; 1082 StateNode *Previous; 1083 }; 1084 1085 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 1086 /// 1087 /// In case of equal penalties, we want to prefer states that were inserted 1088 /// first. During state generation we make sure that we insert states first 1089 /// that break the line as late as possible. 1090 typedef std::pair<unsigned, unsigned> OrderedPenalty; 1091 1092 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 1093 /// \c State has the given \c OrderedPenalty. 1094 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 1095 1096 /// \brief The BFS queue type. 1097 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 1098 std::greater<QueueItem> > QueueType; 1099 1100 /// \brief Analyze the entire solution space starting from \p InitialState. 1101 /// 1102 /// This implements a variant of Dijkstra's algorithm on the graph that spans 1103 /// the solution space (\c LineStates are the nodes). The algorithm tries to 1104 /// find the shortest path (the one with lowest penalty) from \p InitialState 1105 /// to a state where all tokens are placed. 1106 void analyzeSolutionSpace(LineState &InitialState) { 1107 std::set<LineState> Seen; 1108 1109 // Insert start element into queue. 1110 StateNode *Node = 1111 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 1112 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 1113 ++Count; 1114 1115 // While not empty, take first element and follow edges. 1116 while (!Queue.empty()) { 1117 unsigned Penalty = Queue.top().first.first; 1118 StateNode *Node = Queue.top().second; 1119 if (Node->State.NextToken == NULL) { 1120 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); 1121 break; 1122 } 1123 Queue.pop(); 1124 1125 // Cut off the analysis of certain solutions if the analysis gets too 1126 // complex. See description of IgnoreStackForComparison. 1127 if (Count > 10000) 1128 Node->State.IgnoreStackForComparison = true; 1129 1130 if (!Seen.insert(Node->State).second) 1131 // State already examined with lower penalty. 1132 continue; 1133 1134 addNextStateToQueue(Penalty, Node, /*NewLine=*/false); 1135 addNextStateToQueue(Penalty, Node, /*NewLine=*/true); 1136 } 1137 1138 if (Queue.empty()) 1139 // We were unable to find a solution, do nothing. 1140 // FIXME: Add diagnostic? 1141 return; 1142 1143 // Reconstruct the solution. 1144 reconstructPath(InitialState, Queue.top().second); 1145 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); 1146 DEBUG(llvm::dbgs() << "---\n"); 1147 } 1148 1149 void reconstructPath(LineState &State, StateNode *Current) { 1150 std::deque<StateNode *> Path; 1151 // We do not need a break before the initial token. 1152 while (Current->Previous) { 1153 Path.push_front(Current); 1154 Current = Current->Previous; 1155 } 1156 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); 1157 I != E; ++I) { 1158 DEBUG({ 1159 if ((*I)->NewLine) { 1160 llvm::dbgs() << "Penalty for splitting before " 1161 << (*I)->Previous->State.NextToken->Tok.getName() << ": " 1162 << (*I)->Previous->State.NextToken->SplitPenalty << "\n"; 1163 } 1164 }); 1165 addTokenToState((*I)->NewLine, false, State); 1166 } 1167 } 1168 1169 /// \brief Add the following state to the analysis queue \c Queue. 1170 /// 1171 /// Assume the current state is \p PreviousNode and has been reached with a 1172 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1173 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1174 bool NewLine) { 1175 if (NewLine && !canBreak(PreviousNode->State)) 1176 return; 1177 if (!NewLine && mustBreak(PreviousNode->State)) 1178 return; 1179 if (NewLine) { 1180 if (!PreviousNode->State.Stack.back().ContainsLineBreak) 1181 Penalty += 15; 1182 Penalty += PreviousNode->State.NextToken->SplitPenalty; 1183 } 1184 1185 StateNode *Node = new (Allocator.Allocate()) 1186 StateNode(PreviousNode->State, NewLine, PreviousNode); 1187 Penalty += addTokenToState(NewLine, true, Node->State); 1188 if (Node->State.Column > getColumnLimit()) { 1189 unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); 1190 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; 1191 } 1192 1193 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); 1194 ++Count; 1195 } 1196 1197 /// \brief Returns \c true, if a line break after \p State is allowed. 1198 bool canBreak(const LineState &State) { 1199 const FormatToken &Current = *State.NextToken; 1200 const FormatToken &Previous = *Current.Previous; 1201 assert(&Previous == Current.Previous); 1202 if (!Current.CanBreakBefore && 1203 !(Current.is(tok::r_brace) && 1204 State.Stack.back().BreakBeforeClosingBrace)) 1205 return false; 1206 // The opening "{" of a braced list has to be on the same line as the first 1207 // element if it is nested in another braced init list or function call. 1208 if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && 1209 Previous.Previous && 1210 Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) 1211 return false; 1212 // This prevents breaks like: 1213 // ... 1214 // SomeParameter, OtherParameter).DoSomething( 1215 // ... 1216 // As they hide "DoSomething" and are generally bad for readability. 1217 if (Previous.opensScope() && 1218 State.LowestLevelOnLine < State.StartOfLineLevel) 1219 return false; 1220 return !State.Stack.back().NoLineBreak; 1221 } 1222 1223 /// \brief Returns \c true, if a line break after \p State is mandatory. 1224 bool mustBreak(const LineState &State) { 1225 const FormatToken &Current = *State.NextToken; 1226 const FormatToken &Previous = *Current.Previous; 1227 if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon) 1228 return true; 1229 if (!Style.Cpp11BracedListStyle && Current.is(tok::r_brace) && 1230 State.Stack.back().BreakBeforeClosingBrace) 1231 return true; 1232 if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) 1233 return true; 1234 if (Style.BreakConstructorInitializersBeforeComma) { 1235 if (Previous.Type == TT_CtorInitializerComma) 1236 return false; 1237 if (Current.Type == TT_CtorInitializerComma) 1238 return true; 1239 } 1240 if ((Previous.isOneOf(tok::comma, tok::semi) || Current.is(tok::question) || 1241 (Current.Type == TT_ConditionalExpr && 1242 !(Current.is(tok::colon) && Previous.is(tok::question)))) && 1243 State.Stack.back().BreakBeforeParameter && 1244 !Current.isTrailingComment() && 1245 !Current.isOneOf(tok::r_paren, tok::r_brace)) 1246 return true; 1247 if (Style.AlwaysBreakBeforeMultilineStrings && 1248 State.Column > State.Stack.back().Indent && 1249 Current.is(tok::string_literal) && Previous.isNot(tok::lessless) && 1250 Previous.Type != TT_InlineASMColon && 1251 ((Current.getNextNonComment() && 1252 Current.getNextNonComment()->is(tok::string_literal)) || 1253 (Current.TokenText.find("\\\n") != StringRef::npos))) 1254 return true; 1255 1256 if (!Style.BreakBeforeBinaryOperators) { 1257 // If we need to break somewhere inside the LHS of a binary expression, we 1258 // should also break after the operator. Otherwise, the formatting would 1259 // hide the operator precedence, e.g. in: 1260 // if (aaaaaaaaaaaaaa == 1261 // bbbbbbbbbbbbbb && c) {.. 1262 // For comparisons, we only apply this rule, if the LHS is a binary 1263 // expression itself as otherwise, the line breaks seem superfluous. 1264 // We need special cases for ">>" which we have split into two ">" while 1265 // lexing in order to make template parsing easier. 1266 // 1267 // FIXME: We'll need something similar for styles that break before binary 1268 // operators. 1269 bool IsComparison = (Previous.getPrecedence() == prec::Relational || 1270 Previous.getPrecedence() == prec::Equality) && 1271 Previous.Previous && Previous.Previous->Type != 1272 TT_BinaryOperator; // For >>. 1273 bool LHSIsBinaryExpr = 1274 Previous.Previous && Previous.Previous->FakeRParens > 0; 1275 if (Previous.Type == TT_BinaryOperator && 1276 (!IsComparison || LHSIsBinaryExpr) && 1277 Current.Type != TT_BinaryOperator && // For >>. 1278 !Current.isTrailingComment() && 1279 !Previous.isOneOf(tok::lessless, tok::question) && 1280 Previous.getPrecedence() != prec::Assignment && 1281 State.Stack.back().BreakBeforeParameter) 1282 return true; 1283 } 1284 1285 // Same as above, but for the first "<<" operator. 1286 if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter && 1287 State.Stack.back().FirstLessLess == 0) 1288 return true; 1289 1290 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding 1291 // out whether it is the first parameter. Clean this up. 1292 if (Current.Type == TT_ObjCSelectorName && 1293 Current.LongestObjCSelectorName == 0 && 1294 State.Stack.back().BreakBeforeParameter) 1295 return true; 1296 if ((Current.Type == TT_CtorInitializerColon || 1297 (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0))) 1298 return true; 1299 1300 if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) && 1301 Line.MightBeFunctionDecl && State.Stack.back().BreakBeforeParameter && 1302 State.ParenLevel == 0) 1303 return true; 1304 return false; 1305 } 1306 1307 // Returns the total number of columns required for the remaining tokens. 1308 unsigned getRemainingLength(const LineState &State) { 1309 if (State.NextToken && State.NextToken->Previous) 1310 return Line.Last->TotalLength - State.NextToken->Previous->TotalLength; 1311 return 0; 1312 } 1313 1314 FormatStyle Style; 1315 SourceManager &SourceMgr; 1316 const AnnotatedLine &Line; 1317 const unsigned FirstIndent; 1318 const FormatToken *RootToken; 1319 WhitespaceManager &Whitespaces; 1320 1321 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1322 QueueType Queue; 1323 // Increasing count of \c StateNode items we have created. This is used 1324 // to create a deterministic order independent of the container. 1325 unsigned Count; 1326 encoding::Encoding Encoding; 1327 bool BinPackInconclusiveFunctions; 1328 }; 1329 1330 class FormatTokenLexer { 1331 public: 1332 FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, 1333 encoding::Encoding Encoding) 1334 : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex), 1335 SourceMgr(SourceMgr), IdentTable(getFormattingLangOpts()), 1336 Encoding(Encoding) { 1337 Lex.SetKeepWhitespaceMode(true); 1338 } 1339 1340 ArrayRef<FormatToken *> lex() { 1341 assert(Tokens.empty()); 1342 do { 1343 Tokens.push_back(getNextToken()); 1344 } while (Tokens.back()->Tok.isNot(tok::eof)); 1345 return Tokens; 1346 } 1347 1348 IdentifierTable &getIdentTable() { return IdentTable; } 1349 1350 private: 1351 FormatToken *getNextToken() { 1352 if (GreaterStashed) { 1353 // Create a synthesized second '>' token. 1354 Token Greater = FormatTok->Tok; 1355 FormatTok = new (Allocator.Allocate()) FormatToken; 1356 FormatTok->Tok = Greater; 1357 SourceLocation GreaterLocation = 1358 FormatTok->Tok.getLocation().getLocWithOffset(1); 1359 FormatTok->WhitespaceRange = 1360 SourceRange(GreaterLocation, GreaterLocation); 1361 FormatTok->TokenText = ">"; 1362 FormatTok->CodePointCount = 1; 1363 GreaterStashed = false; 1364 return FormatTok; 1365 } 1366 1367 FormatTok = new (Allocator.Allocate()) FormatToken; 1368 readRawToken(*FormatTok); 1369 SourceLocation WhitespaceStart = 1370 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1371 if (SourceMgr.getFileOffset(WhitespaceStart) == 0) 1372 FormatTok->IsFirst = true; 1373 1374 // Consume and record whitespace until we find a significant token. 1375 unsigned WhitespaceLength = TrailingWhitespace; 1376 while (FormatTok->Tok.is(tok::unknown)) { 1377 unsigned Newlines = FormatTok->TokenText.count('\n'); 1378 if (Newlines > 0) 1379 FormatTok->LastNewlineOffset = 1380 WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1; 1381 FormatTok->NewlinesBefore += Newlines; 1382 unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n"); 1383 FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines; 1384 WhitespaceLength += FormatTok->Tok.getLength(); 1385 1386 readRawToken(*FormatTok); 1387 } 1388 1389 // In case the token starts with escaped newlines, we want to 1390 // take them into account as whitespace - this pattern is quite frequent 1391 // in macro definitions. 1392 // FIXME: What do we want to do with other escaped spaces, and escaped 1393 // spaces or newlines in the middle of tokens? 1394 // FIXME: Add a more explicit test. 1395 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1396 FormatTok->TokenText[1] == '\n') { 1397 // FIXME: ++FormatTok->NewlinesBefore is missing... 1398 WhitespaceLength += 2; 1399 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1400 } 1401 1402 TrailingWhitespace = 0; 1403 if (FormatTok->Tok.is(tok::comment)) { 1404 StringRef UntrimmedText = FormatTok->TokenText; 1405 FormatTok->TokenText = FormatTok->TokenText.rtrim(); 1406 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1407 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1408 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1409 FormatTok->Tok.setIdentifierInfo(&Info); 1410 FormatTok->Tok.setKind(Info.getTokenID()); 1411 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1412 FormatTok->Tok.setKind(tok::greater); 1413 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1414 GreaterStashed = true; 1415 } 1416 1417 // Now FormatTok is the next non-whitespace token. 1418 FormatTok->CodePointCount = 1419 encoding::getCodePointCount(FormatTok->TokenText, Encoding); 1420 1421 FormatTok->WhitespaceRange = SourceRange( 1422 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1423 return FormatTok; 1424 } 1425 1426 FormatToken *FormatTok; 1427 bool GreaterStashed; 1428 unsigned TrailingWhitespace; 1429 Lexer &Lex; 1430 SourceManager &SourceMgr; 1431 IdentifierTable IdentTable; 1432 encoding::Encoding Encoding; 1433 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1434 SmallVector<FormatToken *, 16> Tokens; 1435 1436 void readRawToken(FormatToken &Tok) { 1437 Lex.LexFromRawLexer(Tok.Tok); 1438 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1439 Tok.Tok.getLength()); 1440 1441 // For formatting, treat unterminated string literals like normal string 1442 // literals. 1443 if (Tok.is(tok::unknown) && !Tok.TokenText.empty() && 1444 Tok.TokenText[0] == '"') { 1445 Tok.Tok.setKind(tok::string_literal); 1446 Tok.IsUnterminatedLiteral = true; 1447 } 1448 } 1449 }; 1450 1451 class Formatter : public UnwrappedLineConsumer { 1452 public: 1453 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1454 const std::vector<CharSourceRange> &Ranges) 1455 : Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1456 Whitespaces(SourceMgr, Style), Ranges(Ranges), 1457 Encoding(encoding::detectEncoding(Lex.getBuffer())) { 1458 DEBUG(llvm::dbgs() << "File encoding: " 1459 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1460 : "unknown") 1461 << "\n"); 1462 } 1463 1464 virtual ~Formatter() {} 1465 1466 tooling::Replacements format() { 1467 FormatTokenLexer Tokens(Lex, SourceMgr, Encoding); 1468 1469 UnwrappedLineParser Parser(Style, Tokens.lex(), *this); 1470 bool StructuralError = Parser.parse(); 1471 TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); 1472 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1473 Annotator.annotate(AnnotatedLines[i]); 1474 } 1475 deriveLocalStyle(); 1476 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1477 Annotator.calculateFormattingInformation(AnnotatedLines[i]); 1478 } 1479 1480 // Adapt level to the next line if this is a comment. 1481 // FIXME: Can/should this be done in the UnwrappedLineParser? 1482 const AnnotatedLine *NextNonCommentLine = NULL; 1483 for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) { 1484 if (NextNonCommentLine && AnnotatedLines[i].First->is(tok::comment) && 1485 !AnnotatedLines[i].First->Next) 1486 AnnotatedLines[i].Level = NextNonCommentLine->Level; 1487 else 1488 NextNonCommentLine = AnnotatedLines[i].First->isNot(tok::r_brace) 1489 ? &AnnotatedLines[i] 1490 : NULL; 1491 } 1492 1493 std::vector<int> IndentForLevel; 1494 bool PreviousLineWasTouched = false; 1495 const FormatToken *PreviousLineLastToken = 0; 1496 bool FormatPPDirective = false; 1497 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1498 E = AnnotatedLines.end(); 1499 I != E; ++I) { 1500 const AnnotatedLine &TheLine = *I; 1501 const FormatToken *FirstTok = TheLine.First; 1502 int Offset = getIndentOffset(*TheLine.First); 1503 1504 // Check whether this line is part of a formatted preprocessor directive. 1505 if (FirstTok->HasUnescapedNewline) 1506 FormatPPDirective = false; 1507 if (!FormatPPDirective && TheLine.InPPDirective && 1508 (touchesLine(TheLine) || touchesPPDirective(I + 1, E))) 1509 FormatPPDirective = true; 1510 1511 // Determine indent and try to merge multiple unwrapped lines. 1512 while (IndentForLevel.size() <= TheLine.Level) 1513 IndentForLevel.push_back(-1); 1514 IndentForLevel.resize(TheLine.Level + 1); 1515 unsigned Indent = getIndent(IndentForLevel, TheLine.Level); 1516 if (static_cast<int>(Indent) + Offset >= 0) 1517 Indent += Offset; 1518 tryFitMultipleLinesInOne(Indent, I, E); 1519 1520 bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0; 1521 if (TheLine.First->is(tok::eof)) { 1522 if (PreviousLineWasTouched) { 1523 unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u); 1524 Whitespaces.replaceWhitespace(*TheLine.First, NewLines, /*Indent*/ 0, 1525 /*TargetColumn*/ 0); 1526 } 1527 } else if (TheLine.Type != LT_Invalid && 1528 (WasMoved || FormatPPDirective || touchesLine(TheLine))) { 1529 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); 1530 if (FirstTok->WhitespaceRange.isValid() && 1531 // Insert a break even if there is a structural error in case where 1532 // we break apart a line consisting of multiple unwrapped lines. 1533 (FirstTok->NewlinesBefore == 0 || !StructuralError)) { 1534 formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent, 1535 TheLine.InPPDirective); 1536 } else { 1537 Indent = LevelIndent = 1538 SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) - 1539 1; 1540 } 1541 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1542 TheLine.First, Whitespaces, Encoding, 1543 BinPackInconclusiveFunctions); 1544 Formatter.format(I + 1 != E ? &*(I + 1) : NULL); 1545 IndentForLevel[TheLine.Level] = LevelIndent; 1546 PreviousLineWasTouched = true; 1547 } else { 1548 // Format the first token if necessary, and notify the WhitespaceManager 1549 // about the unchanged whitespace. 1550 for (const FormatToken *Tok = TheLine.First; Tok != NULL; 1551 Tok = Tok->Next) { 1552 if (Tok == TheLine.First && 1553 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { 1554 unsigned LevelIndent = 1555 SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1; 1556 // Remove trailing whitespace of the previous line if it was 1557 // touched. 1558 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) { 1559 formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent, 1560 TheLine.InPPDirective); 1561 } else { 1562 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1563 } 1564 1565 if (static_cast<int>(LevelIndent) - Offset >= 0) 1566 LevelIndent -= Offset; 1567 if (Tok->isNot(tok::comment)) 1568 IndentForLevel[TheLine.Level] = LevelIndent; 1569 } else { 1570 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1571 } 1572 } 1573 // If we did not reformat this unwrapped line, the column at the end of 1574 // the last token is unchanged - thus, we can calculate the end of the 1575 // last token. 1576 PreviousLineWasTouched = false; 1577 } 1578 PreviousLineLastToken = I->Last; 1579 } 1580 return Whitespaces.generateReplacements(); 1581 } 1582 1583 private: 1584 void deriveLocalStyle() { 1585 unsigned CountBoundToVariable = 0; 1586 unsigned CountBoundToType = 0; 1587 bool HasCpp03IncompatibleFormat = false; 1588 bool HasBinPackedFunction = false; 1589 bool HasOnePerLineFunction = false; 1590 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1591 if (!AnnotatedLines[i].First->Next) 1592 continue; 1593 FormatToken *Tok = AnnotatedLines[i].First->Next; 1594 while (Tok->Next) { 1595 if (Tok->Type == TT_PointerOrReference) { 1596 bool SpacesBefore = 1597 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1598 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 1599 Tok->Next->WhitespaceRange.getEnd(); 1600 if (SpacesBefore && !SpacesAfter) 1601 ++CountBoundToVariable; 1602 else if (!SpacesBefore && SpacesAfter) 1603 ++CountBoundToType; 1604 } 1605 1606 if (Tok->Type == TT_TemplateCloser && 1607 Tok->Previous->Type == TT_TemplateCloser && 1608 Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) 1609 HasCpp03IncompatibleFormat = true; 1610 1611 if (Tok->PackingKind == PPK_BinPacked) 1612 HasBinPackedFunction = true; 1613 if (Tok->PackingKind == PPK_OnePerLine) 1614 HasOnePerLineFunction = true; 1615 1616 Tok = Tok->Next; 1617 } 1618 } 1619 if (Style.DerivePointerBinding) { 1620 if (CountBoundToType > CountBoundToVariable) 1621 Style.PointerBindsToType = true; 1622 else if (CountBoundToType < CountBoundToVariable) 1623 Style.PointerBindsToType = false; 1624 } 1625 if (Style.Standard == FormatStyle::LS_Auto) { 1626 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1627 : FormatStyle::LS_Cpp03; 1628 } 1629 BinPackInconclusiveFunctions = 1630 HasBinPackedFunction || !HasOnePerLineFunction; 1631 } 1632 1633 /// \brief Get the indent of \p Level from \p IndentForLevel. 1634 /// 1635 /// \p IndentForLevel must contain the indent for the level \c l 1636 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1637 /// that level is unknown. 1638 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 1639 if (IndentForLevel[Level] != -1) 1640 return IndentForLevel[Level]; 1641 if (Level == 0) 1642 return 0; 1643 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; 1644 } 1645 1646 /// \brief Get the offset of the line relatively to the level. 1647 /// 1648 /// For example, 'public:' labels in classes are offset by 1 or 2 1649 /// characters to the left from their level. 1650 int getIndentOffset(const FormatToken &RootToken) { 1651 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) 1652 return Style.AccessModifierOffset; 1653 return 0; 1654 } 1655 1656 /// \brief Tries to merge lines into one. 1657 /// 1658 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1659 /// if possible; note that \c I will be incremented when lines are merged. 1660 void tryFitMultipleLinesInOne(unsigned Indent, 1661 std::vector<AnnotatedLine>::iterator &I, 1662 std::vector<AnnotatedLine>::iterator E) { 1663 // We can never merge stuff if there are trailing line comments. 1664 if (I->Last->Type == TT_LineComment) 1665 return; 1666 1667 if (Indent > Style.ColumnLimit) 1668 return; 1669 1670 unsigned Limit = Style.ColumnLimit - Indent; 1671 // If we already exceed the column limit, we set 'Limit' to 0. The different 1672 // tryMerge..() functions can then decide whether to still do merging. 1673 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; 1674 1675 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1676 return; 1677 1678 if (I->Last->is(tok::l_brace)) { 1679 tryMergeSimpleBlock(I, E, Limit); 1680 } else if (Style.AllowShortIfStatementsOnASingleLine && 1681 I->First->is(tok::kw_if)) { 1682 tryMergeSimpleControlStatement(I, E, Limit); 1683 } else if (Style.AllowShortLoopsOnASingleLine && 1684 I->First->isOneOf(tok::kw_for, tok::kw_while)) { 1685 tryMergeSimpleControlStatement(I, E, Limit); 1686 } else if (I->InPPDirective && 1687 (I->First->HasUnescapedNewline || I->First->IsFirst)) { 1688 tryMergeSimplePPDirective(I, E, Limit); 1689 } 1690 } 1691 1692 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1693 std::vector<AnnotatedLine>::iterator E, 1694 unsigned Limit) { 1695 if (Limit == 0) 1696 return; 1697 AnnotatedLine &Line = *I; 1698 if (!(I + 1)->InPPDirective || (I + 1)->First->HasUnescapedNewline) 1699 return; 1700 if (I + 2 != E && (I + 2)->InPPDirective && 1701 !(I + 2)->First->HasUnescapedNewline) 1702 return; 1703 if (1 + (I + 1)->Last->TotalLength > Limit) 1704 return; 1705 join(Line, *(++I)); 1706 } 1707 1708 void tryMergeSimpleControlStatement(std::vector<AnnotatedLine>::iterator &I, 1709 std::vector<AnnotatedLine>::iterator E, 1710 unsigned Limit) { 1711 if (Limit == 0) 1712 return; 1713 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman && 1714 (I + 1)->First->is(tok::l_brace)) 1715 return; 1716 if ((I + 1)->InPPDirective != I->InPPDirective || 1717 ((I + 1)->InPPDirective && (I + 1)->First->HasUnescapedNewline)) 1718 return; 1719 AnnotatedLine &Line = *I; 1720 if (Line.Last->isNot(tok::r_paren)) 1721 return; 1722 if (1 + (I + 1)->Last->TotalLength > Limit) 1723 return; 1724 if ((I + 1)->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, 1725 tok::kw_while) || 1726 (I + 1)->First->Type == TT_LineComment) 1727 return; 1728 // Only inline simple if's (no nested if or else). 1729 if (I + 2 != E && Line.First->is(tok::kw_if) && 1730 (I + 2)->First->is(tok::kw_else)) 1731 return; 1732 join(Line, *(++I)); 1733 } 1734 1735 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1736 std::vector<AnnotatedLine>::iterator E, 1737 unsigned Limit) { 1738 // No merging if the brace already is on the next line. 1739 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 1740 return; 1741 1742 // First, check that the current line allows merging. This is the case if 1743 // we're not in a control flow statement and the last token is an opening 1744 // brace. 1745 AnnotatedLine &Line = *I; 1746 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, 1747 tok::kw_else, tok::kw_try, tok::kw_catch, 1748 tok::kw_for, 1749 // This gets rid of all ObjC @ keywords and methods. 1750 tok::at, tok::minus, tok::plus)) 1751 return; 1752 1753 FormatToken *Tok = (I + 1)->First; 1754 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && 1755 (Tok->getNextNonComment() == NULL || 1756 Tok->getNextNonComment()->is(tok::semi))) { 1757 // We merge empty blocks even if the line exceeds the column limit. 1758 Tok->SpacesRequiredBefore = 0; 1759 Tok->CanBreakBefore = true; 1760 join(Line, *(I + 1)); 1761 I += 1; 1762 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { 1763 // Check that we still have three lines and they fit into the limit. 1764 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1765 !nextTwoLinesFitInto(I, Limit)) 1766 return; 1767 1768 // Second, check that the next line does not contain any braces - if it 1769 // does, readability declines when putting it into a single line. 1770 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1771 return; 1772 do { 1773 if (Tok->isOneOf(tok::l_brace, tok::r_brace)) 1774 return; 1775 Tok = Tok->Next; 1776 } while (Tok != NULL); 1777 1778 // Last, check that the third line contains a single closing brace. 1779 Tok = (I + 2)->First; 1780 if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) || 1781 Tok->MustBreakBefore) 1782 return; 1783 1784 join(Line, *(I + 1)); 1785 join(Line, *(I + 2)); 1786 I += 2; 1787 } 1788 } 1789 1790 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1791 unsigned Limit) { 1792 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1793 Limit; 1794 } 1795 1796 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1797 assert(!A.Last->Next); 1798 assert(!B.First->Previous); 1799 A.Last->Next = B.First; 1800 B.First->Previous = A.Last; 1801 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 1802 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 1803 Tok->TotalLength += LengthA; 1804 A.Last = Tok; 1805 } 1806 } 1807 1808 bool touchesRanges(const CharSourceRange &Range) { 1809 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1810 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), 1811 Ranges[i].getBegin()) && 1812 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1813 Range.getBegin())) 1814 return true; 1815 } 1816 return false; 1817 } 1818 1819 bool touchesLine(const AnnotatedLine &TheLine) { 1820 const FormatToken *First = TheLine.First; 1821 const FormatToken *Last = TheLine.Last; 1822 CharSourceRange LineRange = CharSourceRange::getCharRange( 1823 First->WhitespaceRange.getBegin().getLocWithOffset( 1824 First->LastNewlineOffset), 1825 Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1)); 1826 return touchesRanges(LineRange); 1827 } 1828 1829 bool touchesPPDirective(std::vector<AnnotatedLine>::iterator I, 1830 std::vector<AnnotatedLine>::iterator E) { 1831 for (; I != E; ++I) { 1832 if (I->First->HasUnescapedNewline) 1833 return false; 1834 if (touchesLine(*I)) 1835 return true; 1836 } 1837 return false; 1838 } 1839 1840 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { 1841 const FormatToken *First = TheLine.First; 1842 CharSourceRange LineRange = CharSourceRange::getCharRange( 1843 First->WhitespaceRange.getBegin(), 1844 First->WhitespaceRange.getBegin().getLocWithOffset( 1845 First->LastNewlineOffset)); 1846 return touchesRanges(LineRange); 1847 } 1848 1849 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1850 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1851 } 1852 1853 /// \brief Add a new line and the required indent before the first Token 1854 /// of the \c UnwrappedLine if there was no structural parsing error. 1855 /// Returns the indent level of the \c UnwrappedLine. 1856 void formatFirstToken(const FormatToken &RootToken, 1857 const FormatToken *PreviousToken, unsigned Indent, 1858 bool InPPDirective) { 1859 unsigned Newlines = 1860 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1861 // Remove empty lines before "}" where applicable. 1862 if (RootToken.is(tok::r_brace) && 1863 (!RootToken.Next || 1864 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) 1865 Newlines = std::min(Newlines, 1u); 1866 if (Newlines == 0 && !RootToken.IsFirst) 1867 Newlines = 1; 1868 1869 // Insert extra new line before access specifiers. 1870 if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) && 1871 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) 1872 ++Newlines; 1873 1874 Whitespaces.replaceWhitespace( 1875 RootToken, Newlines, Indent, Indent, 1876 InPPDirective && !RootToken.HasUnescapedNewline); 1877 } 1878 1879 FormatStyle Style; 1880 Lexer &Lex; 1881 SourceManager &SourceMgr; 1882 WhitespaceManager Whitespaces; 1883 std::vector<CharSourceRange> Ranges; 1884 std::vector<AnnotatedLine> AnnotatedLines; 1885 1886 encoding::Encoding Encoding; 1887 bool BinPackInconclusiveFunctions; 1888 }; 1889 1890 } // end anonymous namespace 1891 1892 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1893 SourceManager &SourceMgr, 1894 std::vector<CharSourceRange> Ranges) { 1895 Formatter formatter(Style, Lex, SourceMgr, Ranges); 1896 return formatter.format(); 1897 } 1898 1899 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1900 std::vector<tooling::Range> Ranges, 1901 StringRef FileName) { 1902 FileManager Files((FileSystemOptions())); 1903 DiagnosticsEngine Diagnostics( 1904 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1905 new DiagnosticOptions); 1906 SourceManager SourceMgr(Diagnostics, Files); 1907 llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1908 const clang::FileEntry *Entry = 1909 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1910 SourceMgr.overrideFileContents(Entry, Buf); 1911 FileID ID = 1912 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1913 Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, 1914 getFormattingLangOpts(Style.Standard)); 1915 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1916 std::vector<CharSourceRange> CharRanges; 1917 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1918 SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset()); 1919 SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); 1920 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1921 } 1922 return reformat(Style, Lex, SourceMgr, CharRanges); 1923 } 1924 1925 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { 1926 LangOptions LangOpts; 1927 LangOpts.CPlusPlus = 1; 1928 LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1929 LangOpts.LineComment = 1; 1930 LangOpts.Bool = 1; 1931 LangOpts.ObjC1 = 1; 1932 LangOpts.ObjC2 = 1; 1933 return LangOpts; 1934 } 1935 1936 } // namespace format 1937 } // namespace clang 1938