1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements an indenter that manages the indentation of 12 /// continuations. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H 17 #define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H 18 19 #include "Encoding.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/Support/Regex.h" 22 23 namespace clang { 24 class SourceManager; 25 26 namespace format { 27 28 class AnnotatedLine; 29 struct FormatToken; 30 struct LineState; 31 struct ParenState; 32 class WhitespaceManager; 33 34 class ContinuationIndenter { 35 public: 36 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in 37 /// column \p FirstIndent. 38 ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr, 39 WhitespaceManager &Whitespaces, 40 encoding::Encoding Encoding, 41 bool BinPackInconclusiveFunctions); 42 43 /// \brief Get the initial state, i.e. the state after placing \p Line's 44 /// first token at \p FirstIndent. 45 LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, 46 bool DryRun); 47 48 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 49 // better home. 50 /// \brief Returns \c true, if a line break after \p State is allowed. 51 bool canBreak(const LineState &State); 52 53 /// \brief Returns \c true, if a line break after \p State is mandatory. 54 bool mustBreak(const LineState &State); 55 56 /// \brief Appends the next token to \p State and updates information 57 /// necessary for indentation. 58 /// 59 /// Puts the token on the current line if \p Newline is \c false and adds a 60 /// line break and necessary indentation otherwise. 61 /// 62 /// If \p DryRun is \c false, also creates and stores the required 63 /// \c Replacement. 64 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 65 unsigned ExtraSpaces = 0); 66 67 /// \brief Get the column limit for this line. This is the style's column 68 /// limit, potentially reduced for preprocessor definitions. 69 unsigned getColumnLimit(const LineState &State) const; 70 71 private: 72 /// \brief Mark the next token as consumed in \p State and modify its stacks 73 /// accordingly. 74 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 75 76 /// \brief Update 'State' according to the next token's fake left parentheses. 77 void moveStatePastFakeLParens(LineState &State, bool Newline); 78 /// \brief Update 'State' according to the next token's fake r_parens. 79 void moveStatePastFakeRParens(LineState &State); 80 81 /// \brief Update 'State' according to the next token being one of "(<{[". 82 void moveStatePastScopeOpener(LineState &State, bool Newline); 83 /// \brief Update 'State' according to the next token being one of ")>}]". 84 void moveStatePastScopeCloser(LineState &State); 85 /// \brief Update 'State' with the next token opening a nested block. 86 void moveStateToNewBlock(LineState &State); 87 88 /// \brief If the current token sticks out over the end of the line, break 89 /// it if possible. 90 /// 91 /// \returns An extra penalty if a token was broken, otherwise 0. 92 /// 93 /// The returned penalty will cover the cost of the additional line breaks and 94 /// column limit violation in all lines except for the last one. The penalty 95 /// for the column limit violation in the last line (and in single line 96 /// tokens) is handled in \c addNextStateToQueue. 97 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 98 bool DryRun); 99 100 /// \brief Appends the next token to \p State and updates information 101 /// necessary for indentation. 102 /// 103 /// Puts the token on the current line. 104 /// 105 /// If \p DryRun is \c false, also creates and stores the required 106 /// \c Replacement. 107 void addTokenOnCurrentLine(LineState &State, bool DryRun, 108 unsigned ExtraSpaces); 109 110 /// \brief Appends the next token to \p State and updates information 111 /// necessary for indentation. 112 /// 113 /// Adds a line break and necessary indentation. 114 /// 115 /// If \p DryRun is \c false, also creates and stores the required 116 /// \c Replacement. 117 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 118 119 /// \brief Calculate the new column for a line wrap before the next token. 120 unsigned getNewLineColumn(const LineState &State); 121 122 /// \brief Adds a multiline token to the \p State. 123 /// 124 /// \returns Extra penalty for the first line of the literal: last line is 125 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 126 /// matter, as we don't change them. 127 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 128 129 /// \brief Returns \c true if the next token starts a multiline string 130 /// literal. 131 /// 132 /// This includes implicitly concatenated strings, strings that will be broken 133 /// by clang-format and string literals with escaped newlines. 134 bool nextIsMultilineString(const LineState &State); 135 136 FormatStyle Style; 137 SourceManager &SourceMgr; 138 WhitespaceManager &Whitespaces; 139 encoding::Encoding Encoding; 140 bool BinPackInconclusiveFunctions; 141 llvm::Regex CommentPragmasRegex; 142 }; 143 144 struct ParenState { 145 ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, 146 bool AvoidBinPacking, bool NoLineBreak) 147 : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), 148 FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), 149 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 150 NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0), 151 StartOfFunctionCall(0), StartOfArraySubscripts(0), 152 NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0), 153 ContainsLineBreak(false), ContainsUnwrappedBuilder(0), 154 AlignColons(true), ObjCSelectorNameFound(false), 155 HasMultipleNestedBlocks(false), JSFunctionInlined(false) {} 156 157 /// \brief The position to which a specific parenthesis level needs to be 158 /// indented. 159 unsigned Indent; 160 161 /// \brief The number of indentation levels of the block. 162 unsigned IndentLevel; 163 164 /// \brief The position of the last space on each level. 165 /// 166 /// Used e.g. to break like: 167 /// functionCall(Parameter, otherCall( 168 /// OtherParameter)); 169 unsigned LastSpace; 170 171 /// \brief The position the first "<<" operator encountered on each level. 172 /// 173 /// Used to align "<<" operators. 0 if no such operator has been encountered 174 /// on a level. 175 unsigned FirstLessLess; 176 177 /// \brief Whether a newline needs to be inserted before the block's closing 178 /// brace. 179 /// 180 /// We only want to insert a newline before the closing brace if there also 181 /// was a newline after the beginning left brace. 182 bool BreakBeforeClosingBrace; 183 184 /// \brief The column of a \c ? in a conditional expression; 185 unsigned QuestionColumn; 186 187 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 188 /// lines, in this context. 189 bool AvoidBinPacking; 190 191 /// \brief Break after the next comma (or all the commas in this context if 192 /// \c AvoidBinPacking is \c true). 193 bool BreakBeforeParameter; 194 195 /// \brief Line breaking in this context would break a formatting rule. 196 bool NoLineBreak; 197 198 /// \brief True if the last binary operator on this level was wrapped to the 199 /// next line. 200 bool LastOperatorWrapped; 201 202 /// \brief The position of the colon in an ObjC method declaration/call. 203 unsigned ColonPos; 204 205 /// \brief The start of the most recent function in a builder-type call. 206 unsigned StartOfFunctionCall; 207 208 /// \brief Contains the start of array subscript expressions, so that they 209 /// can be aligned. 210 unsigned StartOfArraySubscripts; 211 212 /// \brief If a nested name specifier was broken over multiple lines, this 213 /// contains the start column of the second line. Otherwise 0. 214 unsigned NestedNameSpecifierContinuation; 215 216 /// \brief If a call expression was broken over multiple lines, this 217 /// contains the start column of the second line. Otherwise 0. 218 unsigned CallContinuation; 219 220 /// \brief The column of the first variable name in a variable declaration. 221 /// 222 /// Used to align further variables if necessary. 223 unsigned VariablePos; 224 225 /// \brief \c true if this \c ParenState already contains a line-break. 226 /// 227 /// The first line break in a certain \c ParenState causes extra penalty so 228 /// that clang-format prefers similar breaks, i.e. breaks in the same 229 /// parenthesis. 230 bool ContainsLineBreak; 231 232 /// \brief \c true if this \c ParenState contains multiple segments of a 233 /// builder-type call on one line. 234 bool ContainsUnwrappedBuilder; 235 236 /// \brief \c true if the colons of the curren ObjC method expression should 237 /// be aligned. 238 /// 239 /// Not considered for memoization as it will always have the same value at 240 /// the same token. 241 bool AlignColons; 242 243 /// \brief \c true if at least one selector name was found in the current 244 /// ObjC method expression. 245 /// 246 /// Not considered for memoization as it will always have the same value at 247 /// the same token. 248 bool ObjCSelectorNameFound; 249 250 /// \brief \c true if there are multiple nested blocks inside these parens. 251 /// 252 /// Not considered for memoization as it will always have the same value at 253 /// the same token. 254 bool HasMultipleNestedBlocks; 255 256 // \brief The previous JavaScript 'function' keyword is not wrapped to a new 257 // line. 258 bool JSFunctionInlined; 259 260 bool operator<(const ParenState &Other) const { 261 if (Indent != Other.Indent) 262 return Indent < Other.Indent; 263 if (LastSpace != Other.LastSpace) 264 return LastSpace < Other.LastSpace; 265 if (FirstLessLess != Other.FirstLessLess) 266 return FirstLessLess < Other.FirstLessLess; 267 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 268 return BreakBeforeClosingBrace; 269 if (QuestionColumn != Other.QuestionColumn) 270 return QuestionColumn < Other.QuestionColumn; 271 if (AvoidBinPacking != Other.AvoidBinPacking) 272 return AvoidBinPacking; 273 if (BreakBeforeParameter != Other.BreakBeforeParameter) 274 return BreakBeforeParameter; 275 if (NoLineBreak != Other.NoLineBreak) 276 return NoLineBreak; 277 if (LastOperatorWrapped != Other.LastOperatorWrapped) 278 return LastOperatorWrapped; 279 if (ColonPos != Other.ColonPos) 280 return ColonPos < Other.ColonPos; 281 if (StartOfFunctionCall != Other.StartOfFunctionCall) 282 return StartOfFunctionCall < Other.StartOfFunctionCall; 283 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 284 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 285 if (CallContinuation != Other.CallContinuation) 286 return CallContinuation < Other.CallContinuation; 287 if (VariablePos != Other.VariablePos) 288 return VariablePos < Other.VariablePos; 289 if (ContainsLineBreak != Other.ContainsLineBreak) 290 return ContainsLineBreak < Other.ContainsLineBreak; 291 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 292 return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder; 293 if (JSFunctionInlined != Other.JSFunctionInlined) 294 return JSFunctionInlined < Other.JSFunctionInlined; 295 return false; 296 } 297 }; 298 299 /// \brief The current state when indenting a unwrapped line. 300 /// 301 /// As the indenting tries different combinations this is copied by value. 302 struct LineState { 303 /// \brief The number of used columns in the current line. 304 unsigned Column; 305 306 /// \brief The token that needs to be next formatted. 307 FormatToken *NextToken; 308 309 /// \brief \c true if this line contains a continued for-loop section. 310 bool LineContainsContinuedForLoopSection; 311 312 /// \brief The \c NestingLevel at the start of this line. 313 unsigned StartOfLineLevel; 314 315 /// \brief The lowest \c NestingLevel on the current line. 316 unsigned LowestLevelOnLine; 317 318 /// \brief The start column of the string literal, if we're in a string 319 /// literal sequence, 0 otherwise. 320 unsigned StartOfStringLiteral; 321 322 /// \brief A stack keeping track of properties applying to parenthesis 323 /// levels. 324 std::vector<ParenState> Stack; 325 326 /// \brief Ignore the stack of \c ParenStates for state comparison. 327 /// 328 /// In long and deeply nested unwrapped lines, the current algorithm can 329 /// be insufficient for finding the best formatting with a reasonable amount 330 /// of time and memory. Setting this flag will effectively lead to the 331 /// algorithm not analyzing some combinations. However, these combinations 332 /// rarely contain the optimal solution: In short, accepting a higher 333 /// penalty early would need to lead to different values in the \c 334 /// ParenState stack (in an otherwise identical state) and these different 335 /// values would need to lead to a significant amount of avoided penalty 336 /// later. 337 /// 338 /// FIXME: Come up with a better algorithm instead. 339 bool IgnoreStackForComparison; 340 341 /// \brief The indent of the first token. 342 unsigned FirstIndent; 343 344 /// \brief The line that is being formatted. 345 /// 346 /// Does not need to be considered for memoization because it doesn't change. 347 const AnnotatedLine *Line; 348 349 /// \brief Comparison operator to be able to used \c LineState in \c map. 350 bool operator<(const LineState &Other) const { 351 if (NextToken != Other.NextToken) 352 return NextToken < Other.NextToken; 353 if (Column != Other.Column) 354 return Column < Other.Column; 355 if (LineContainsContinuedForLoopSection != 356 Other.LineContainsContinuedForLoopSection) 357 return LineContainsContinuedForLoopSection; 358 if (StartOfLineLevel != Other.StartOfLineLevel) 359 return StartOfLineLevel < Other.StartOfLineLevel; 360 if (LowestLevelOnLine != Other.LowestLevelOnLine) 361 return LowestLevelOnLine < Other.LowestLevelOnLine; 362 if (StartOfStringLiteral != Other.StartOfStringLiteral) 363 return StartOfStringLiteral < Other.StartOfStringLiteral; 364 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 365 return false; 366 return Stack < Other.Stack; 367 } 368 }; 369 370 } // end namespace format 371 } // end namespace clang 372 373 #endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H 374