1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements an indenter that manages the indentation of 12 /// continuations. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 18 19 #include "Encoding.h" 20 #include "FormatToken.h" 21 #include "clang/Format/Format.h" 22 #include "llvm/Support/Regex.h" 23 24 namespace clang { 25 class SourceManager; 26 27 namespace format { 28 29 class AnnotatedLine; 30 struct FormatToken; 31 struct LineState; 32 struct ParenState; 33 class WhitespaceManager; 34 35 class ContinuationIndenter { 36 public: 37 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in 38 /// column \p FirstIndent. 39 ContinuationIndenter(const FormatStyle &Style, 40 const AdditionalKeywords &Keywords, 41 const SourceManager &SourceMgr, 42 WhitespaceManager &Whitespaces, 43 encoding::Encoding Encoding, 44 bool BinPackInconclusiveFunctions); 45 46 /// \brief Get the initial state, i.e. the state after placing \p Line's 47 /// first token at \p FirstIndent. 48 LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, 49 bool DryRun); 50 51 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 52 // better home. 53 /// \brief Returns \c true, if a line break after \p State is allowed. 54 bool canBreak(const LineState &State); 55 56 /// \brief Returns \c true, if a line break after \p State is mandatory. 57 bool mustBreak(const LineState &State); 58 59 /// \brief Appends the next token to \p State and updates information 60 /// necessary for indentation. 61 /// 62 /// Puts the token on the current line if \p Newline is \c false and adds a 63 /// line break and necessary indentation otherwise. 64 /// 65 /// If \p DryRun is \c false, also creates and stores the required 66 /// \c Replacement. 67 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 68 unsigned ExtraSpaces = 0); 69 70 /// \brief Get the column limit for this line. This is the style's column 71 /// limit, potentially reduced for preprocessor definitions. 72 unsigned getColumnLimit(const LineState &State) const; 73 74 private: 75 /// \brief Mark the next token as consumed in \p State and modify its stacks 76 /// accordingly. 77 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 78 79 /// \brief Update 'State' according to the next token's fake left parentheses. 80 void moveStatePastFakeLParens(LineState &State, bool Newline); 81 /// \brief Update 'State' according to the next token's fake r_parens. 82 void moveStatePastFakeRParens(LineState &State); 83 84 /// \brief Update 'State' according to the next token being one of "(<{[". 85 void moveStatePastScopeOpener(LineState &State, bool Newline); 86 /// \brief Update 'State' according to the next token being one of ")>}]". 87 void moveStatePastScopeCloser(LineState &State); 88 /// \brief Update 'State' with the next token opening a nested block. 89 void moveStateToNewBlock(LineState &State); 90 91 /// \brief If the current token sticks out over the end of the line, break 92 /// it if possible. 93 /// 94 /// \returns An extra penalty if a token was broken, otherwise 0. 95 /// 96 /// The returned penalty will cover the cost of the additional line breaks and 97 /// column limit violation in all lines except for the last one. The penalty 98 /// for the column limit violation in the last line (and in single line 99 /// tokens) is handled in \c addNextStateToQueue. 100 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 101 bool DryRun); 102 103 /// \brief Appends the next token to \p State and updates information 104 /// necessary for indentation. 105 /// 106 /// Puts the token on the current line. 107 /// 108 /// If \p DryRun is \c false, also creates and stores the required 109 /// \c Replacement. 110 void addTokenOnCurrentLine(LineState &State, bool DryRun, 111 unsigned ExtraSpaces); 112 113 /// \brief Appends the next token to \p State and updates information 114 /// necessary for indentation. 115 /// 116 /// Adds a line break and necessary indentation. 117 /// 118 /// If \p DryRun is \c false, also creates and stores the required 119 /// \c Replacement. 120 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 121 122 /// \brief Calculate the new column for a line wrap before the next token. 123 unsigned getNewLineColumn(const LineState &State); 124 125 /// \brief Adds a multiline token to the \p State. 126 /// 127 /// \returns Extra penalty for the first line of the literal: last line is 128 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 129 /// matter, as we don't change them. 130 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 131 132 /// \brief Returns \c true if the next token starts a multiline string 133 /// literal. 134 /// 135 /// This includes implicitly concatenated strings, strings that will be broken 136 /// by clang-format and string literals with escaped newlines. 137 bool nextIsMultilineString(const LineState &State); 138 139 FormatStyle Style; 140 const AdditionalKeywords &Keywords; 141 const SourceManager &SourceMgr; 142 WhitespaceManager &Whitespaces; 143 encoding::Encoding Encoding; 144 bool BinPackInconclusiveFunctions; 145 llvm::Regex CommentPragmasRegex; 146 }; 147 148 struct ParenState { 149 ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, 150 bool AvoidBinPacking, bool NoLineBreak) 151 : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), 152 NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), 153 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 154 NoLineBreak(NoLineBreak), LastOperatorWrapped(true), 155 ContainsLineBreak(false), ContainsUnwrappedBuilder(false), 156 AlignColons(true), ObjCSelectorNameFound(false), 157 HasMultipleNestedBlocks(false), NestedBlockInlined(false) {} 158 159 /// \brief The position to which a specific parenthesis level needs to be 160 /// indented. 161 unsigned Indent; 162 163 /// \brief The number of indentation levels of the block. 164 unsigned IndentLevel; 165 166 /// \brief The position of the last space on each level. 167 /// 168 /// Used e.g. to break like: 169 /// functionCall(Parameter, otherCall( 170 /// OtherParameter)); 171 unsigned LastSpace; 172 173 /// \brief If a block relative to this parenthesis level gets wrapped, indent 174 /// it this much. 175 unsigned NestedBlockIndent; 176 177 /// \brief The position the first "<<" operator encountered on each level. 178 /// 179 /// Used to align "<<" operators. 0 if no such operator has been encountered 180 /// on a level. 181 unsigned FirstLessLess = 0; 182 183 /// \brief The column of a \c ? in a conditional expression; 184 unsigned QuestionColumn = 0; 185 186 /// \brief The position of the colon in an ObjC method declaration/call. 187 unsigned ColonPos = 0; 188 189 /// \brief The start of the most recent function in a builder-type call. 190 unsigned StartOfFunctionCall = 0; 191 192 /// \brief Contains the start of array subscript expressions, so that they 193 /// can be aligned. 194 unsigned StartOfArraySubscripts = 0; 195 196 /// \brief If a nested name specifier was broken over multiple lines, this 197 /// contains the start column of the second line. Otherwise 0. 198 unsigned NestedNameSpecifierContinuation = 0; 199 200 /// \brief If a call expression was broken over multiple lines, this 201 /// contains the start column of the second line. Otherwise 0. 202 unsigned CallContinuation = 0; 203 204 /// \brief The column of the first variable name in a variable declaration. 205 /// 206 /// Used to align further variables if necessary. 207 unsigned VariablePos = 0; 208 209 /// \brief Whether a newline needs to be inserted before the block's closing 210 /// brace. 211 /// 212 /// We only want to insert a newline before the closing brace if there also 213 /// was a newline after the beginning left brace. 214 bool BreakBeforeClosingBrace : 1; 215 216 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 217 /// lines, in this context. 218 bool AvoidBinPacking : 1; 219 220 /// \brief Break after the next comma (or all the commas in this context if 221 /// \c AvoidBinPacking is \c true). 222 bool BreakBeforeParameter : 1; 223 224 /// \brief Line breaking in this context would break a formatting rule. 225 bool NoLineBreak : 1; 226 227 /// \brief True if the last binary operator on this level was wrapped to the 228 /// next line. 229 bool LastOperatorWrapped : 1; 230 231 /// \brief \c true if this \c ParenState already contains a line-break. 232 /// 233 /// The first line break in a certain \c ParenState causes extra penalty so 234 /// that clang-format prefers similar breaks, i.e. breaks in the same 235 /// parenthesis. 236 bool ContainsLineBreak : 1; 237 238 /// \brief \c true if this \c ParenState contains multiple segments of a 239 /// builder-type call on one line. 240 bool ContainsUnwrappedBuilder : 1; 241 242 /// \brief \c true if the colons of the curren ObjC method expression should 243 /// be aligned. 244 /// 245 /// Not considered for memoization as it will always have the same value at 246 /// the same token. 247 bool AlignColons : 1; 248 249 /// \brief \c true if at least one selector name was found in the current 250 /// ObjC method expression. 251 /// 252 /// Not considered for memoization as it will always have the same value at 253 /// the same token. 254 bool ObjCSelectorNameFound : 1; 255 256 /// \brief \c true if there are multiple nested blocks inside these parens. 257 /// 258 /// Not considered for memoization as it will always have the same value at 259 /// the same token. 260 bool HasMultipleNestedBlocks : 1; 261 262 // \brief The start of a nested block (e.g. lambda introducer in C++ or 263 // "function" in JavaScript) is not wrapped to a new line. 264 bool NestedBlockInlined : 1; 265 266 bool operator<(const ParenState &Other) const { 267 if (Indent != Other.Indent) 268 return Indent < Other.Indent; 269 if (LastSpace != Other.LastSpace) 270 return LastSpace < Other.LastSpace; 271 if (NestedBlockIndent != Other.NestedBlockIndent) 272 return NestedBlockIndent < Other.NestedBlockIndent; 273 if (FirstLessLess != Other.FirstLessLess) 274 return FirstLessLess < Other.FirstLessLess; 275 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 276 return BreakBeforeClosingBrace; 277 if (QuestionColumn != Other.QuestionColumn) 278 return QuestionColumn < Other.QuestionColumn; 279 if (AvoidBinPacking != Other.AvoidBinPacking) 280 return AvoidBinPacking; 281 if (BreakBeforeParameter != Other.BreakBeforeParameter) 282 return BreakBeforeParameter; 283 if (NoLineBreak != Other.NoLineBreak) 284 return NoLineBreak; 285 if (LastOperatorWrapped != Other.LastOperatorWrapped) 286 return LastOperatorWrapped; 287 if (ColonPos != Other.ColonPos) 288 return ColonPos < Other.ColonPos; 289 if (StartOfFunctionCall != Other.StartOfFunctionCall) 290 return StartOfFunctionCall < Other.StartOfFunctionCall; 291 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 292 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 293 if (CallContinuation != Other.CallContinuation) 294 return CallContinuation < Other.CallContinuation; 295 if (VariablePos != Other.VariablePos) 296 return VariablePos < Other.VariablePos; 297 if (ContainsLineBreak != Other.ContainsLineBreak) 298 return ContainsLineBreak; 299 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 300 return ContainsUnwrappedBuilder; 301 if (NestedBlockInlined != Other.NestedBlockInlined) 302 return NestedBlockInlined; 303 return false; 304 } 305 }; 306 307 /// \brief The current state when indenting a unwrapped line. 308 /// 309 /// As the indenting tries different combinations this is copied by value. 310 struct LineState { 311 /// \brief The number of used columns in the current line. 312 unsigned Column; 313 314 /// \brief The token that needs to be next formatted. 315 FormatToken *NextToken; 316 317 /// \brief \c true if this line contains a continued for-loop section. 318 bool LineContainsContinuedForLoopSection; 319 320 /// \brief The \c NestingLevel at the start of this line. 321 unsigned StartOfLineLevel; 322 323 /// \brief The lowest \c NestingLevel on the current line. 324 unsigned LowestLevelOnLine; 325 326 /// \brief The start column of the string literal, if we're in a string 327 /// literal sequence, 0 otherwise. 328 unsigned StartOfStringLiteral; 329 330 /// \brief A stack keeping track of properties applying to parenthesis 331 /// levels. 332 std::vector<ParenState> Stack; 333 334 /// \brief Ignore the stack of \c ParenStates for state comparison. 335 /// 336 /// In long and deeply nested unwrapped lines, the current algorithm can 337 /// be insufficient for finding the best formatting with a reasonable amount 338 /// of time and memory. Setting this flag will effectively lead to the 339 /// algorithm not analyzing some combinations. However, these combinations 340 /// rarely contain the optimal solution: In short, accepting a higher 341 /// penalty early would need to lead to different values in the \c 342 /// ParenState stack (in an otherwise identical state) and these different 343 /// values would need to lead to a significant amount of avoided penalty 344 /// later. 345 /// 346 /// FIXME: Come up with a better algorithm instead. 347 bool IgnoreStackForComparison; 348 349 /// \brief The indent of the first token. 350 unsigned FirstIndent; 351 352 /// \brief The line that is being formatted. 353 /// 354 /// Does not need to be considered for memoization because it doesn't change. 355 const AnnotatedLine *Line; 356 357 /// \brief Comparison operator to be able to used \c LineState in \c map. 358 bool operator<(const LineState &Other) const { 359 if (NextToken != Other.NextToken) 360 return NextToken < Other.NextToken; 361 if (Column != Other.Column) 362 return Column < Other.Column; 363 if (LineContainsContinuedForLoopSection != 364 Other.LineContainsContinuedForLoopSection) 365 return LineContainsContinuedForLoopSection; 366 if (StartOfLineLevel != Other.StartOfLineLevel) 367 return StartOfLineLevel < Other.StartOfLineLevel; 368 if (LowestLevelOnLine != Other.LowestLevelOnLine) 369 return LowestLevelOnLine < Other.LowestLevelOnLine; 370 if (StartOfStringLiteral != Other.StartOfStringLiteral) 371 return StartOfStringLiteral < Other.StartOfStringLiteral; 372 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 373 return false; 374 return Stack < Other.Stack; 375 } 376 }; 377 378 } // end namespace format 379 } // end namespace clang 380 381 #endif 382