1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Contains implementation of BreakableToken class and classes derived 12 /// from it. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "BreakableToken.h" 17 #include "clang/Basic/CharInfo.h" 18 #include "clang/Format/Format.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/Support/Debug.h" 21 #include <algorithm> 22 23 #define DEBUG_TYPE "format-token-breaker" 24 25 namespace clang { 26 namespace format { 27 28 static const char *const Blanks = " \t\v\f\r"; 29 static bool IsBlank(char C) { 30 switch (C) { 31 case ' ': 32 case '\t': 33 case '\v': 34 case '\f': 35 case '\r': 36 return true; 37 default: 38 return false; 39 } 40 } 41 42 static BreakableToken::Split getCommentSplit(StringRef Text, 43 unsigned ContentStartColumn, 44 unsigned ColumnLimit, 45 unsigned TabWidth, 46 encoding::Encoding Encoding) { 47 if (ColumnLimit <= ContentStartColumn + 1) 48 return BreakableToken::Split(StringRef::npos, 0); 49 50 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 51 unsigned MaxSplitBytes = 0; 52 53 for (unsigned NumChars = 0; 54 NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 55 unsigned BytesInChar = 56 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 57 NumChars += 58 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), 59 ContentStartColumn, TabWidth, Encoding); 60 MaxSplitBytes += BytesInChar; 61 } 62 63 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 64 if (SpaceOffset == StringRef::npos || 65 // Don't break at leading whitespace. 66 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 67 // Make sure that we don't break at leading whitespace that 68 // reaches past MaxSplit. 69 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 70 if (FirstNonWhitespace == StringRef::npos) 71 // If the comment is only whitespace, we cannot split. 72 return BreakableToken::Split(StringRef::npos, 0); 73 SpaceOffset = Text.find_first_of( 74 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 75 } 76 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 77 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 78 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 79 return BreakableToken::Split(BeforeCut.size(), 80 AfterCut.begin() - BeforeCut.end()); 81 } 82 return BreakableToken::Split(StringRef::npos, 0); 83 } 84 85 static BreakableToken::Split 86 getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, 87 unsigned TabWidth, encoding::Encoding Encoding) { 88 // FIXME: Reduce unit test case. 89 if (Text.empty()) 90 return BreakableToken::Split(StringRef::npos, 0); 91 if (ColumnLimit <= UsedColumns) 92 return BreakableToken::Split(StringRef::npos, 0); 93 unsigned MaxSplit = ColumnLimit - UsedColumns; 94 StringRef::size_type SpaceOffset = 0; 95 StringRef::size_type SlashOffset = 0; 96 StringRef::size_type WordStartOffset = 0; 97 StringRef::size_type SplitPoint = 0; 98 for (unsigned Chars = 0;;) { 99 unsigned Advance; 100 if (Text[0] == '\\') { 101 Advance = encoding::getEscapeSequenceLength(Text); 102 Chars += Advance; 103 } else { 104 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 105 Chars += encoding::columnWidthWithTabs( 106 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 107 } 108 109 if (Chars > MaxSplit || Text.size() <= Advance) 110 break; 111 112 if (IsBlank(Text[0])) 113 SpaceOffset = SplitPoint; 114 if (Text[0] == '/') 115 SlashOffset = SplitPoint; 116 if (Advance == 1 && !isAlphanumeric(Text[0])) 117 WordStartOffset = SplitPoint; 118 119 SplitPoint += Advance; 120 Text = Text.substr(Advance); 121 } 122 123 if (SpaceOffset != 0) 124 return BreakableToken::Split(SpaceOffset + 1, 0); 125 if (SlashOffset != 0) 126 return BreakableToken::Split(SlashOffset + 1, 0); 127 if (WordStartOffset != 0) 128 return BreakableToken::Split(WordStartOffset + 1, 0); 129 if (SplitPoint != 0) 130 return BreakableToken::Split(SplitPoint, 0); 131 return BreakableToken::Split(StringRef::npos, 0); 132 } 133 134 unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 135 136 unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 137 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 138 return StartColumn + Prefix.size() + Postfix.size() + 139 encoding::columnWidthWithTabs(Line.substr(Offset, Length), 140 StartColumn + Prefix.size(), 141 Style.TabWidth, Encoding); 142 } 143 144 BreakableSingleLineToken::BreakableSingleLineToken( 145 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 146 StringRef Prefix, StringRef Postfix, bool InPPDirective, 147 encoding::Encoding Encoding, const FormatStyle &Style) 148 : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), 149 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { 150 assert(Tok.TokenText.endswith(Postfix)); 151 Line = Tok.TokenText.substr( 152 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 153 } 154 155 BreakableStringLiteral::BreakableStringLiteral( 156 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 157 StringRef Prefix, StringRef Postfix, bool InPPDirective, 158 encoding::Encoding Encoding, const FormatStyle &Style) 159 : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, 160 InPPDirective, Encoding, Style) {} 161 162 BreakableToken::Split 163 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 164 unsigned ColumnLimit) const { 165 return getStringSplit(Line.substr(TailOffset), 166 StartColumn + Prefix.size() + Postfix.size(), 167 ColumnLimit, Style.TabWidth, Encoding); 168 } 169 170 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 171 unsigned TailOffset, Split Split, 172 WhitespaceManager &Whitespaces) { 173 unsigned LeadingSpaces = StartColumn; 174 // The '@' of an ObjC string literal (@"Test") does not become part of the 175 // string token. 176 // FIXME: It might be a cleaner solution to merge the tokens as a 177 // precomputation step. 178 if (Prefix.startswith("@")) 179 --LeadingSpaces; 180 Whitespaces.replaceWhitespaceInToken( 181 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 182 Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); 183 } 184 185 static StringRef getLineCommentIndentPrefix(StringRef Comment) { 186 static const char *const KnownPrefixes[] = {"///", "//", "//!"}; 187 StringRef LongestPrefix; 188 for (StringRef KnownPrefix : KnownPrefixes) { 189 if (Comment.startswith(KnownPrefix)) { 190 size_t PrefixLength = KnownPrefix.size(); 191 while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') 192 ++PrefixLength; 193 if (PrefixLength > LongestPrefix.size()) 194 LongestPrefix = Comment.substr(0, PrefixLength); 195 } 196 } 197 return LongestPrefix; 198 } 199 200 BreakableLineComment::BreakableLineComment( 201 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 202 bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) 203 : BreakableSingleLineToken(Token, IndentLevel, StartColumn, 204 getLineCommentIndentPrefix(Token.TokenText), "", 205 InPPDirective, Encoding, Style) { 206 OriginalPrefix = Prefix; 207 if (Token.TokenText.size() > Prefix.size() && 208 isAlphanumeric(Token.TokenText[Prefix.size()])) { 209 if (Prefix == "//") 210 Prefix = "// "; 211 else if (Prefix == "///") 212 Prefix = "/// "; 213 else if (Prefix == "//!") 214 Prefix = "//! "; 215 } 216 } 217 218 BreakableToken::Split 219 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, 220 unsigned ColumnLimit) const { 221 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), 222 ColumnLimit, Style.TabWidth, Encoding); 223 } 224 225 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 226 Split Split, 227 WhitespaceManager &Whitespaces) { 228 Whitespaces.replaceWhitespaceInToken( 229 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, 230 Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); 231 } 232 233 void BreakableLineComment::replaceWhitespace(unsigned LineIndex, 234 unsigned TailOffset, Split Split, 235 WhitespaceManager &Whitespaces) { 236 Whitespaces.replaceWhitespaceInToken( 237 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", 238 "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, 239 /*Spaces=*/1); 240 } 241 242 void BreakableLineComment::replaceWhitespaceBefore( 243 unsigned LineIndex, WhitespaceManager &Whitespaces) { 244 if (OriginalPrefix != Prefix) { 245 Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", 246 /*InPPDirective=*/false, 247 /*Newlines=*/0, /*IndentLevel=*/0, 248 /*Spaces=*/1); 249 } 250 } 251 252 BreakableBlockComment::BreakableBlockComment( 253 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 254 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 255 encoding::Encoding Encoding, const FormatStyle &Style) 256 : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { 257 StringRef TokenText(Token.TokenText); 258 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 259 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 260 261 int IndentDelta = StartColumn - OriginalStartColumn; 262 LeadingWhitespace.resize(Lines.size()); 263 StartOfLineColumn.resize(Lines.size()); 264 StartOfLineColumn[0] = StartColumn + 2; 265 for (size_t i = 1; i < Lines.size(); ++i) 266 adjustWhitespace(i, IndentDelta); 267 268 Decoration = "* "; 269 if (Lines.size() == 1 && !FirstInLine) { 270 // Comments for which FirstInLine is false can start on arbitrary column, 271 // and available horizontal space can be too small to align consecutive 272 // lines with the first one. 273 // FIXME: We could, probably, align them to current indentation level, but 274 // now we just wrap them without stars. 275 Decoration = ""; 276 } 277 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 278 // If the last line is empty, the closing "*/" will have a star. 279 if (i + 1 == e && Lines[i].empty()) 280 break; 281 if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i])) 282 continue; 283 while (!Lines[i].startswith(Decoration)) 284 Decoration = Decoration.substr(0, Decoration.size() - 1); 285 } 286 287 LastLineNeedsDecoration = true; 288 IndentAtLineBreak = StartOfLineColumn[0] + 1; 289 for (size_t i = 1; i < Lines.size(); ++i) { 290 if (Lines[i].empty()) { 291 if (i + 1 == Lines.size()) { 292 // Empty last line means that we already have a star as a part of the 293 // trailing */. We also need to preserve whitespace, so that */ is 294 // correctly indented. 295 LastLineNeedsDecoration = false; 296 } else if (Decoration.empty()) { 297 // For all other lines, set the start column to 0 if they're empty, so 298 // we do not insert trailing whitespace anywhere. 299 StartOfLineColumn[i] = 0; 300 } 301 continue; 302 } 303 304 // The first line already excludes the star. 305 // For all other lines, adjust the line to exclude the star and 306 // (optionally) the first whitespace. 307 unsigned DecorationSize = 308 Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size(); 309 StartOfLineColumn[i] += DecorationSize; 310 Lines[i] = Lines[i].substr(DecorationSize); 311 LeadingWhitespace[i] += DecorationSize; 312 if (!Decoration.startswith(Lines[i])) 313 IndentAtLineBreak = 314 std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); 315 } 316 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 317 DEBUG({ 318 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 319 for (size_t i = 0; i < Lines.size(); ++i) { 320 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] 321 << "\n"; 322 } 323 }); 324 } 325 326 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 327 int IndentDelta) { 328 // When in a preprocessor directive, the trailing backslash in a block comment 329 // is not needed, but can serve a purpose of uniformity with necessary escaped 330 // newlines outside the comment. In this case we remove it here before 331 // trimming the trailing whitespace. The backslash will be re-added later when 332 // inserting a line break. 333 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 334 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 335 --EndOfPreviousLine; 336 337 // Calculate the end of the non-whitespace text in the previous line. 338 EndOfPreviousLine = 339 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 340 if (EndOfPreviousLine == StringRef::npos) 341 EndOfPreviousLine = 0; 342 else 343 ++EndOfPreviousLine; 344 // Calculate the start of the non-whitespace text in the current line. 345 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 346 if (StartOfLine == StringRef::npos) 347 StartOfLine = Lines[LineIndex].rtrim("\r\n").size(); 348 349 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 350 // Adjust Lines to only contain relevant text. 351 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); 352 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); 353 // Adjust LeadingWhitespace to account all whitespace between the lines 354 // to the current line. 355 LeadingWhitespace[LineIndex] = 356 Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); 357 358 // Adjust the start column uniformly across all lines. 359 StartOfLineColumn[LineIndex] = 360 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 361 IndentDelta; 362 } 363 364 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } 365 366 unsigned BreakableBlockComment::getLineLengthAfterSplit( 367 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 368 unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); 369 return ContentStartColumn + 370 encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), 371 ContentStartColumn, Style.TabWidth, 372 Encoding) + 373 // The last line gets a "*/" postfix. 374 (LineIndex + 1 == Lines.size() ? 2 : 0); 375 } 376 377 BreakableToken::Split 378 BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, 379 unsigned ColumnLimit) const { 380 return getCommentSplit(Lines[LineIndex].substr(TailOffset), 381 getContentStartColumn(LineIndex, TailOffset), 382 ColumnLimit, Style.TabWidth, Encoding); 383 } 384 385 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 386 Split Split, 387 WhitespaceManager &Whitespaces) { 388 StringRef Text = Lines[LineIndex].substr(TailOffset); 389 StringRef Prefix = Decoration; 390 if (LineIndex + 1 == Lines.size() && 391 Text.size() == Split.first + Split.second) { 392 // For the last line we need to break before "*/", but not to add "* ". 393 Prefix = ""; 394 } 395 396 unsigned BreakOffsetInToken = 397 Text.data() - Tok.TokenText.data() + Split.first; 398 unsigned CharsToRemove = Split.second; 399 assert(IndentAtLineBreak >= Decoration.size()); 400 Whitespaces.replaceWhitespaceInToken( 401 Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, 402 IndentLevel, IndentAtLineBreak - Decoration.size()); 403 } 404 405 void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, 406 unsigned TailOffset, Split Split, 407 WhitespaceManager &Whitespaces) { 408 StringRef Text = Lines[LineIndex].substr(TailOffset); 409 unsigned BreakOffsetInToken = 410 Text.data() - Tok.TokenText.data() + Split.first; 411 unsigned CharsToRemove = Split.second; 412 Whitespaces.replaceWhitespaceInToken( 413 Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, 414 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); 415 } 416 417 void BreakableBlockComment::replaceWhitespaceBefore( 418 unsigned LineIndex, WhitespaceManager &Whitespaces) { 419 if (LineIndex == 0) 420 return; 421 StringRef Prefix = Decoration; 422 if (Lines[LineIndex].empty()) { 423 if (LineIndex + 1 == Lines.size()) { 424 if (!LastLineNeedsDecoration) { 425 // If the last line was empty, we don't need a prefix, as the */ will 426 // line up with the decoration (if it exists). 427 Prefix = ""; 428 } 429 } else if (!Decoration.empty()) { 430 // For other empty lines, if we do have a decoration, adapt it to not 431 // contain a trailing whitespace. 432 Prefix = Prefix.substr(0, 1); 433 } 434 } else { 435 if (StartOfLineColumn[LineIndex] == 1) { 436 // This line starts immediately after the decorating *. 437 Prefix = Prefix.substr(0, 1); 438 } 439 } 440 441 unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - 442 Tok.TokenText.data() - 443 LeadingWhitespace[LineIndex]; 444 Whitespaces.replaceWhitespaceInToken( 445 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, 446 InPPDirective, 1, IndentLevel, 447 StartOfLineColumn[LineIndex] - Prefix.size()); 448 } 449 450 unsigned 451 BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 452 unsigned TailOffset) const { 453 // If we break, we always break at the predefined indent. 454 if (TailOffset != 0) 455 return IndentAtLineBreak; 456 return std::max(0, StartOfLineColumn[LineIndex]); 457 } 458 459 } // namespace format 460 } // namespace clang 461