1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Contains implementation of BreakableToken class and classes derived 12 /// from it. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-token-breaker" 17 18 #include "BreakableToken.h" 19 #include "clang/Basic/CharInfo.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/Support/Debug.h" 23 #include <algorithm> 24 25 namespace clang { 26 namespace format { 27 28 static const char *const Blanks = " \t\v\f"; 29 static bool IsBlank(char C) { 30 switch (C) { 31 case ' ': 32 case '\t': 33 case '\v': 34 case '\f': 35 return true; 36 default: 37 return false; 38 } 39 } 40 41 static BreakableToken::Split getCommentSplit(StringRef Text, 42 unsigned ContentStartColumn, 43 unsigned ColumnLimit, 44 encoding::Encoding Encoding) { 45 if (ColumnLimit <= ContentStartColumn + 1) 46 return BreakableToken::Split(StringRef::npos, 0); 47 48 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 49 unsigned MaxSplitBytes = 0; 50 51 for (unsigned NumChars = 0; 52 NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars) 53 MaxSplitBytes += 54 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 55 56 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 57 if (SpaceOffset == StringRef::npos || 58 // Don't break at leading whitespace. 59 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 60 // Make sure that we don't break at leading whitespace that 61 // reaches past MaxSplit. 62 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 63 if (FirstNonWhitespace == StringRef::npos) 64 // If the comment is only whitespace, we cannot split. 65 return BreakableToken::Split(StringRef::npos, 0); 66 SpaceOffset = Text.find_first_of( 67 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 68 } 69 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 70 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 71 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 72 return BreakableToken::Split(BeforeCut.size(), 73 AfterCut.begin() - BeforeCut.end()); 74 } 75 return BreakableToken::Split(StringRef::npos, 0); 76 } 77 78 static BreakableToken::Split getStringSplit(StringRef Text, 79 unsigned ContentStartColumn, 80 unsigned ColumnLimit, 81 encoding::Encoding Encoding) { 82 // FIXME: Reduce unit test case. 83 if (Text.empty()) 84 return BreakableToken::Split(StringRef::npos, 0); 85 if (ColumnLimit <= ContentStartColumn) 86 return BreakableToken::Split(StringRef::npos, 0); 87 unsigned MaxSplit = 88 std::min<unsigned>(ColumnLimit - ContentStartColumn, 89 encoding::getCodePointCount(Text, Encoding) - 1); 90 StringRef::size_type SpaceOffset = 0; 91 StringRef::size_type SlashOffset = 0; 92 StringRef::size_type WordStartOffset = 0; 93 StringRef::size_type SplitPoint = 0; 94 for (unsigned Chars = 0;;) { 95 unsigned Advance; 96 if (Text[0] == '\\') { 97 Advance = encoding::getEscapeSequenceLength(Text); 98 Chars += Advance; 99 } else { 100 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 101 Chars += 1; 102 } 103 104 if (Chars > MaxSplit) 105 break; 106 107 if (IsBlank(Text[0])) 108 SpaceOffset = SplitPoint; 109 if (Text[0] == '/') 110 SlashOffset = SplitPoint; 111 if (Advance == 1 && !isAlphanumeric(Text[0])) 112 WordStartOffset = SplitPoint; 113 114 SplitPoint += Advance; 115 Text = Text.substr(Advance); 116 } 117 118 if (SpaceOffset != 0) 119 return BreakableToken::Split(SpaceOffset + 1, 0); 120 if (SlashOffset != 0) 121 return BreakableToken::Split(SlashOffset + 1, 0); 122 if (WordStartOffset != 0) 123 return BreakableToken::Split(WordStartOffset + 1, 0); 124 if (SplitPoint != 0) 125 return BreakableToken::Split(SplitPoint, 0); 126 return BreakableToken::Split(StringRef::npos, 0); 127 } 128 129 unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 130 131 unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 132 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 133 return StartColumn + Prefix.size() + Postfix.size() + 134 encoding::getCodePointCount(Line.substr(Offset, Length), Encoding); 135 } 136 137 BreakableSingleLineToken::BreakableSingleLineToken( 138 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, 139 StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding) 140 : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn), 141 Prefix(Prefix), Postfix(Postfix) { 142 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); 143 Line = Tok.TokenText.substr( 144 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 145 } 146 147 BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, 148 unsigned StartColumn, 149 bool InPPDirective, 150 encoding::Encoding Encoding) 151 : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective, 152 Encoding) {} 153 154 BreakableToken::Split 155 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 156 unsigned ColumnLimit) const { 157 return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit, 158 Encoding); 159 } 160 161 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 162 unsigned TailOffset, Split Split, 163 WhitespaceManager &Whitespaces) { 164 Whitespaces.replaceWhitespaceInToken( 165 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 166 Prefix, InPPDirective, 1, StartColumn); 167 } 168 169 static StringRef getLineCommentPrefix(StringRef Comment) { 170 static const char *const KnownPrefixes[] = { "/// ", "///", "// ", "//" }; 171 for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) 172 if (Comment.startswith(KnownPrefixes[i])) 173 return KnownPrefixes[i]; 174 return ""; 175 } 176 177 BreakableLineComment::BreakableLineComment(const FormatToken &Token, 178 unsigned StartColumn, 179 bool InPPDirective, 180 encoding::Encoding Encoding) 181 : BreakableSingleLineToken(Token, StartColumn, 182 getLineCommentPrefix(Token.TokenText), "", 183 InPPDirective, Encoding) { 184 OriginalPrefix = Prefix; 185 if (Token.TokenText.size() > Prefix.size() && 186 isAlphanumeric(Token.TokenText[Prefix.size()])) { 187 if (Prefix == "//") 188 Prefix = "// "; 189 else if (Prefix == "///") 190 Prefix = "/// "; 191 } 192 } 193 194 BreakableToken::Split 195 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, 196 unsigned ColumnLimit) const { 197 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), 198 ColumnLimit, Encoding); 199 } 200 201 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 202 Split Split, 203 WhitespaceManager &Whitespaces) { 204 Whitespaces.replaceWhitespaceInToken( 205 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, 206 Postfix, Prefix, InPPDirective, 1, StartColumn); 207 } 208 209 void 210 BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, 211 WhitespaceManager &Whitespaces) { 212 if (OriginalPrefix != Prefix) { 213 Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", 214 false, 0, 1); 215 } 216 } 217 218 BreakableBlockComment::BreakableBlockComment( 219 const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn, 220 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 221 encoding::Encoding Encoding) 222 : BreakableToken(Token, InPPDirective, Encoding) { 223 StringRef TokenText(Token.TokenText); 224 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 225 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 226 227 int IndentDelta = StartColumn - OriginalStartColumn; 228 LeadingWhitespace.resize(Lines.size()); 229 StartOfLineColumn.resize(Lines.size()); 230 StartOfLineColumn[0] = StartColumn + 2; 231 for (size_t i = 1; i < Lines.size(); ++i) 232 adjustWhitespace(Style, i, IndentDelta); 233 234 Decoration = "* "; 235 if (Lines.size() == 1 && !FirstInLine) { 236 // Comments for which FirstInLine is false can start on arbitrary column, 237 // and available horizontal space can be too small to align consecutive 238 // lines with the first one. 239 // FIXME: We could, probably, align them to current indentation level, but 240 // now we just wrap them without stars. 241 Decoration = ""; 242 } 243 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 244 // If the last line is empty, the closing "*/" will have a star. 245 if (i + 1 == e && Lines[i].empty()) 246 break; 247 while (!Lines[i].startswith(Decoration)) 248 Decoration = Decoration.substr(0, Decoration.size() - 1); 249 } 250 251 LastLineNeedsDecoration = true; 252 IndentAtLineBreak = StartOfLineColumn[0] + 1; 253 for (size_t i = 1; i < Lines.size(); ++i) { 254 if (Lines[i].empty()) { 255 if (i + 1 == Lines.size()) { 256 // Empty last line means that we already have a star as a part of the 257 // trailing */. We also need to preserve whitespace, so that */ is 258 // correctly indented. 259 LastLineNeedsDecoration = false; 260 } else if (Decoration.empty()) { 261 // For all other lines, set the start column to 0 if they're empty, so 262 // we do not insert trailing whitespace anywhere. 263 StartOfLineColumn[i] = 0; 264 } 265 continue; 266 } 267 // The first line already excludes the star. 268 // For all other lines, adjust the line to exclude the star and 269 // (optionally) the first whitespace. 270 StartOfLineColumn[i] += Decoration.size(); 271 Lines[i] = Lines[i].substr(Decoration.size()); 272 LeadingWhitespace[i] += Decoration.size(); 273 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); 274 } 275 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 276 DEBUG({ 277 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 278 for (size_t i = 0; i < Lines.size(); ++i) { 279 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] 280 << "\n"; 281 } 282 }); 283 } 284 285 void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, 286 unsigned LineIndex, 287 int IndentDelta) { 288 // When in a preprocessor directive, the trailing backslash in a block comment 289 // is not needed, but can serve a purpose of uniformity with necessary escaped 290 // newlines outside the comment. In this case we remove it here before 291 // trimming the trailing whitespace. The backslash will be re-added later when 292 // inserting a line break. 293 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 294 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 295 --EndOfPreviousLine; 296 297 // Calculate the end of the non-whitespace text in the previous line. 298 EndOfPreviousLine = 299 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 300 if (EndOfPreviousLine == StringRef::npos) 301 EndOfPreviousLine = 0; 302 else 303 ++EndOfPreviousLine; 304 // Calculate the start of the non-whitespace text in the current line. 305 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 306 if (StartOfLine == StringRef::npos) 307 StartOfLine = Lines[LineIndex].size(); 308 309 // Adjust Lines to only contain relevant text. 310 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); 311 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); 312 // Adjust LeadingWhitespace to account all whitespace between the lines 313 // to the current line. 314 LeadingWhitespace[LineIndex] = 315 Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); 316 317 // FIXME: We currently count tabs as 1 character. To solve this, we need to 318 // get the correct indentation width of the start of the comment, which 319 // requires correct counting of the tab expansions before the comment, and 320 // a configurable tab width. Since the current implementation only breaks 321 // if leading tabs are intermixed with spaces, that is not a high priority. 322 323 // Adjust the start column uniformly accross all lines. 324 StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta); 325 } 326 327 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } 328 329 unsigned BreakableBlockComment::getLineLengthAfterSplit( 330 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 331 return getContentStartColumn(LineIndex, Offset) + 332 encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length), 333 Encoding) + 334 // The last line gets a "*/" postfix. 335 (LineIndex + 1 == Lines.size() ? 2 : 0); 336 } 337 338 BreakableToken::Split 339 BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, 340 unsigned ColumnLimit) const { 341 return getCommentSplit(Lines[LineIndex].substr(TailOffset), 342 getContentStartColumn(LineIndex, TailOffset), 343 ColumnLimit, Encoding); 344 } 345 346 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 347 Split Split, 348 WhitespaceManager &Whitespaces) { 349 StringRef Text = Lines[LineIndex].substr(TailOffset); 350 StringRef Prefix = Decoration; 351 if (LineIndex + 1 == Lines.size() && 352 Text.size() == Split.first + Split.second) { 353 // For the last line we need to break before "*/", but not to add "* ". 354 Prefix = ""; 355 } 356 357 unsigned BreakOffsetInToken = 358 Text.data() - Tok.TokenText.data() + Split.first; 359 unsigned CharsToRemove = Split.second; 360 assert(IndentAtLineBreak >= Decoration.size()); 361 Whitespaces.replaceWhitespaceInToken(Tok, BreakOffsetInToken, CharsToRemove, 362 "", Prefix, InPPDirective, 1, 363 IndentAtLineBreak - Decoration.size()); 364 } 365 366 void 367 BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, 368 WhitespaceManager &Whitespaces) { 369 if (LineIndex == 0) 370 return; 371 StringRef Prefix = Decoration; 372 if (Lines[LineIndex].empty()) { 373 if (LineIndex + 1 == Lines.size()) { 374 if (!LastLineNeedsDecoration) { 375 // If the last line was empty, we don't need a prefix, as the */ will 376 // line up with the decoration (if it exists). 377 Prefix = ""; 378 } 379 } else if (!Decoration.empty()) { 380 // For other empty lines, if we do have a decoration, adapt it to not 381 // contain a trailing whitespace. 382 Prefix = Prefix.substr(0, 1); 383 } 384 } else { 385 if (StartOfLineColumn[LineIndex] == 1) { 386 // This line starts immediately after the decorating *. 387 Prefix = Prefix.substr(0, 1); 388 } 389 } 390 391 unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - 392 Tok.TokenText.data() - 393 LeadingWhitespace[LineIndex]; 394 assert(StartOfLineColumn[LineIndex] >= Prefix.size()); 395 Whitespaces.replaceWhitespaceInToken( 396 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, 397 InPPDirective, 1, StartOfLineColumn[LineIndex] - Prefix.size()); 398 } 399 400 unsigned 401 BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 402 unsigned TailOffset) const { 403 // If we break, we always break at the predefined indent. 404 if (TailOffset != 0) 405 return IndentAtLineBreak; 406 return StartOfLineColumn[LineIndex]; 407 } 408 409 } // namespace format 410 } // namespace clang 411