Home | History | Annotate | Download | only in Format
      1 //===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "UnwrappedLineFormatter.h"
     11 #include "WhitespaceManager.h"
     12 #include "llvm/Support/Debug.h"
     13 
     14 #define DEBUG_TYPE "format-formatter"
     15 
     16 namespace clang {
     17 namespace format {
     18 
     19 namespace {
     20 
     21 bool startsExternCBlock(const AnnotatedLine &Line) {
     22   const FormatToken *Next = Line.First->getNextNonComment();
     23   const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
     24   return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
     25          NextNext && NextNext->is(tok::l_brace);
     26 }
     27 
     28 /// \brief Tracks the indent level of \c AnnotatedLines across levels.
     29 ///
     30 /// \c nextLine must be called for each \c AnnotatedLine, after which \c
     31 /// getIndent() will return the indent for the last line \c nextLine was called
     32 /// with.
     33 /// If the line is not formatted (and thus the indent does not change), calling
     34 /// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
     35 /// subsequent lines on the same level to be indented at the same level as the
     36 /// given line.
     37 class LevelIndentTracker {
     38 public:
     39   LevelIndentTracker(const FormatStyle &Style,
     40                      const AdditionalKeywords &Keywords, unsigned StartLevel,
     41                      int AdditionalIndent)
     42       : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
     43     for (unsigned i = 0; i != StartLevel; ++i)
     44       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
     45   }
     46 
     47   /// \brief Returns the indent for the current line.
     48   unsigned getIndent() const { return Indent; }
     49 
     50   /// \brief Update the indent state given that \p Line is going to be formatted
     51   /// next.
     52   void nextLine(const AnnotatedLine &Line) {
     53     Offset = getIndentOffset(*Line.First);
     54     // Update the indent level cache size so that we can rely on it
     55     // having the right size in adjustToUnmodifiedline.
     56     while (IndentForLevel.size() <= Line.Level)
     57       IndentForLevel.push_back(-1);
     58     if (Line.InPPDirective) {
     59       Indent = Line.Level * Style.IndentWidth + AdditionalIndent;
     60     } else {
     61       IndentForLevel.resize(Line.Level + 1);
     62       Indent = getIndent(IndentForLevel, Line.Level);
     63     }
     64     if (static_cast<int>(Indent) + Offset >= 0)
     65       Indent += Offset;
     66   }
     67 
     68   /// \brief Update the level indent to adapt to the given \p Line.
     69   ///
     70   /// When a line is not formatted, we move the subsequent lines on the same
     71   /// level to the same indent.
     72   /// Note that \c nextLine must have been called before this method.
     73   void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
     74     unsigned LevelIndent = Line.First->OriginalColumn;
     75     if (static_cast<int>(LevelIndent) - Offset >= 0)
     76       LevelIndent -= Offset;
     77     if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
     78         !Line.InPPDirective)
     79       IndentForLevel[Line.Level] = LevelIndent;
     80   }
     81 
     82 private:
     83   /// \brief Get the offset of the line relatively to the level.
     84   ///
     85   /// For example, 'public:' labels in classes are offset by 1 or 2
     86   /// characters to the left from their level.
     87   int getIndentOffset(const FormatToken &RootToken) {
     88     if (Style.Language == FormatStyle::LK_Java ||
     89         Style.Language == FormatStyle::LK_JavaScript)
     90       return 0;
     91     if (RootToken.isAccessSpecifier(false) ||
     92         RootToken.isObjCAccessSpecifier() ||
     93         (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) &&
     94          RootToken.Next && RootToken.Next->is(tok::colon)))
     95       return Style.AccessModifierOffset;
     96     return 0;
     97   }
     98 
     99   /// \brief Get the indent of \p Level from \p IndentForLevel.
    100   ///
    101   /// \p IndentForLevel must contain the indent for the level \c l
    102   /// at \p IndentForLevel[l], or a value < 0 if the indent for
    103   /// that level is unknown.
    104   unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) {
    105     if (IndentForLevel[Level] != -1)
    106       return IndentForLevel[Level];
    107     if (Level == 0)
    108       return 0;
    109     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
    110   }
    111 
    112   const FormatStyle &Style;
    113   const AdditionalKeywords &Keywords;
    114   const unsigned AdditionalIndent;
    115 
    116   /// \brief The indent in characters for each level.
    117   std::vector<int> IndentForLevel;
    118 
    119   /// \brief Offset of the current line relative to the indent level.
    120   ///
    121   /// For example, the 'public' keywords is often indented with a negative
    122   /// offset.
    123   int Offset = 0;
    124 
    125   /// \brief The current line's indent.
    126   unsigned Indent = 0;
    127 };
    128 
    129 class LineJoiner {
    130 public:
    131   LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
    132              const SmallVectorImpl<AnnotatedLine *> &Lines)
    133       : Style(Style), Keywords(Keywords), End(Lines.end()),
    134         Next(Lines.begin()) {}
    135 
    136   /// \brief Returns the next line, merging multiple lines into one if possible.
    137   const AnnotatedLine *getNextMergedLine(bool DryRun,
    138                                          LevelIndentTracker &IndentTracker) {
    139     if (Next == End)
    140       return nullptr;
    141     const AnnotatedLine *Current = *Next;
    142     IndentTracker.nextLine(*Current);
    143     unsigned MergedLines =
    144         tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End);
    145     if (MergedLines > 0 && Style.ColumnLimit == 0)
    146       // Disallow line merging if there is a break at the start of one of the
    147       // input lines.
    148       for (unsigned i = 0; i < MergedLines; ++i)
    149         if (Next[i + 1]->First->NewlinesBefore > 0)
    150           MergedLines = 0;
    151     if (!DryRun)
    152       for (unsigned i = 0; i < MergedLines; ++i)
    153         join(*Next[i], *Next[i + 1]);
    154     Next = Next + MergedLines + 1;
    155     return Current;
    156   }
    157 
    158 private:
    159   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
    160   unsigned
    161   tryFitMultipleLinesInOne(unsigned Indent,
    162                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
    163                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
    164     // Can't join the last line with anything.
    165     if (I + 1 == E)
    166       return 0;
    167     // We can never merge stuff if there are trailing line comments.
    168     const AnnotatedLine *TheLine = *I;
    169     if (TheLine->Last->is(TT_LineComment))
    170       return 0;
    171     if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
    172       return 0;
    173     if (TheLine->InPPDirective &&
    174         (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline))
    175       return 0;
    176 
    177     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
    178       return 0;
    179 
    180     unsigned Limit =
    181         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
    182     // If we already exceed the column limit, we set 'Limit' to 0. The different
    183     // tryMerge..() functions can then decide whether to still do merging.
    184     Limit = TheLine->Last->TotalLength > Limit
    185                 ? 0
    186                 : Limit - TheLine->Last->TotalLength;
    187 
    188     // FIXME: TheLine->Level != 0 might or might not be the right check to do.
    189     // If necessary, change to something smarter.
    190     bool MergeShortFunctions =
    191         Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
    192         (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
    193          I[1]->First->is(tok::r_brace)) ||
    194         (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
    195          TheLine->Level != 0);
    196 
    197     if (TheLine->Last->is(TT_FunctionLBrace) &&
    198         TheLine->First != TheLine->Last) {
    199       return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
    200     }
    201     if (TheLine->Last->is(tok::l_brace)) {
    202       return !Style.BraceWrapping.AfterFunction
    203                  ? tryMergeSimpleBlock(I, E, Limit)
    204                  : 0;
    205     }
    206     if (I[1]->First->is(TT_FunctionLBrace) &&
    207         Style.BraceWrapping.AfterFunction) {
    208       if (I[1]->Last->is(TT_LineComment))
    209         return 0;
    210 
    211       // Check for Limit <= 2 to account for the " {".
    212       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
    213         return 0;
    214       Limit -= 2;
    215 
    216       unsigned MergedLines = 0;
    217       if (MergeShortFunctions) {
    218         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
    219         // If we managed to merge the block, count the function header, which is
    220         // on a separate line.
    221         if (MergedLines > 0)
    222           ++MergedLines;
    223       }
    224       return MergedLines;
    225     }
    226     if (TheLine->First->is(tok::kw_if)) {
    227       return Style.AllowShortIfStatementsOnASingleLine
    228                  ? tryMergeSimpleControlStatement(I, E, Limit)
    229                  : 0;
    230     }
    231     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
    232       return Style.AllowShortLoopsOnASingleLine
    233                  ? tryMergeSimpleControlStatement(I, E, Limit)
    234                  : 0;
    235     }
    236     if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) {
    237       return Style.AllowShortCaseLabelsOnASingleLine
    238                  ? tryMergeShortCaseLabels(I, E, Limit)
    239                  : 0;
    240     }
    241     if (TheLine->InPPDirective &&
    242         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
    243       return tryMergeSimplePPDirective(I, E, Limit);
    244     }
    245     return 0;
    246   }
    247 
    248   unsigned
    249   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
    250                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
    251                             unsigned Limit) {
    252     if (Limit == 0)
    253       return 0;
    254     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
    255       return 0;
    256     if (1 + I[1]->Last->TotalLength > Limit)
    257       return 0;
    258     return 1;
    259   }
    260 
    261   unsigned tryMergeSimpleControlStatement(
    262       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
    263       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
    264     if (Limit == 0)
    265       return 0;
    266     if (Style.BraceWrapping.AfterControlStatement &&
    267         (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine))
    268       return 0;
    269     if (I[1]->InPPDirective != (*I)->InPPDirective ||
    270         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
    271       return 0;
    272     Limit = limitConsideringMacros(I + 1, E, Limit);
    273     AnnotatedLine &Line = **I;
    274     if (Line.Last->isNot(tok::r_paren))
    275       return 0;
    276     if (1 + I[1]->Last->TotalLength > Limit)
    277       return 0;
    278     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
    279                              TT_LineComment))
    280       return 0;
    281     // Only inline simple if's (no nested if or else).
    282     if (I + 2 != E && Line.startsWith(tok::kw_if) &&
    283         I[2]->First->is(tok::kw_else))
    284       return 0;
    285     return 1;
    286   }
    287 
    288   unsigned
    289   tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
    290                           SmallVectorImpl<AnnotatedLine *>::const_iterator E,
    291                           unsigned Limit) {
    292     if (Limit == 0 || I + 1 == E ||
    293         I[1]->First->isOneOf(tok::kw_case, tok::kw_default))
    294       return 0;
    295     unsigned NumStmts = 0;
    296     unsigned Length = 0;
    297     bool InPPDirective = I[0]->InPPDirective;
    298     for (; NumStmts < 3; ++NumStmts) {
    299       if (I + 1 + NumStmts == E)
    300         break;
    301       const AnnotatedLine *Line = I[1 + NumStmts];
    302       if (Line->InPPDirective != InPPDirective)
    303         break;
    304       if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
    305         break;
    306       if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
    307                                tok::kw_while, tok::comment) ||
    308           Line->Last->is(tok::comment))
    309         return 0;
    310       Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
    311     }
    312     if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
    313       return 0;
    314     return NumStmts;
    315   }
    316 
    317   unsigned
    318   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
    319                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
    320                       unsigned Limit) {
    321     AnnotatedLine &Line = **I;
    322 
    323     // Don't merge ObjC @ keywords and methods.
    324     // FIXME: If an option to allow short exception handling clauses on a single
    325     // line is added, change this to not return for @try and friends.
    326     if (Style.Language != FormatStyle::LK_Java &&
    327         Line.First->isOneOf(tok::at, tok::minus, tok::plus))
    328       return 0;
    329 
    330     // Check that the current line allows merging. This depends on whether we
    331     // are in a control flow statements as well as several style flags.
    332     if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
    333         (Line.First->Next && Line.First->Next->is(tok::kw_else)))
    334       return 0;
    335     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
    336                             tok::kw___try, tok::kw_catch, tok::kw___finally,
    337                             tok::kw_for, tok::r_brace, Keywords.kw___except)) {
    338       if (!Style.AllowShortBlocksOnASingleLine)
    339         return 0;
    340       if (!Style.AllowShortIfStatementsOnASingleLine &&
    341           Line.startsWith(tok::kw_if))
    342         return 0;
    343       if (!Style.AllowShortLoopsOnASingleLine &&
    344           Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
    345         return 0;
    346       // FIXME: Consider an option to allow short exception handling clauses on
    347       // a single line.
    348       // FIXME: This isn't covered by tests.
    349       // FIXME: For catch, __except, __finally the first token on the line
    350       // is '}', so this isn't correct here.
    351       if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
    352                               Keywords.kw___except, tok::kw___finally))
    353         return 0;
    354     }
    355 
    356     FormatToken *Tok = I[1]->First;
    357     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
    358         (Tok->getNextNonComment() == nullptr ||
    359          Tok->getNextNonComment()->is(tok::semi))) {
    360       // We merge empty blocks even if the line exceeds the column limit.
    361       Tok->SpacesRequiredBefore = 0;
    362       Tok->CanBreakBefore = true;
    363       return 1;
    364     } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
    365                !startsExternCBlock(Line)) {
    366       // We don't merge short records.
    367       if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
    368                               Keywords.kw_interface))
    369         return 0;
    370 
    371       // Check that we still have three lines and they fit into the limit.
    372       if (I + 2 == E || I[2]->Type == LT_Invalid)
    373         return 0;
    374       Limit = limitConsideringMacros(I + 2, E, Limit);
    375 
    376       if (!nextTwoLinesFitInto(I, Limit))
    377         return 0;
    378 
    379       // Second, check that the next line does not contain any braces - if it
    380       // does, readability declines when putting it into a single line.
    381       if (I[1]->Last->is(TT_LineComment))
    382         return 0;
    383       do {
    384         if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)
    385           return 0;
    386         Tok = Tok->Next;
    387       } while (Tok);
    388 
    389       // Last, check that the third line starts with a closing brace.
    390       Tok = I[2]->First;
    391       if (Tok->isNot(tok::r_brace))
    392         return 0;
    393 
    394       // Don't merge "if (a) { .. } else {".
    395       if (Tok->Next && Tok->Next->is(tok::kw_else))
    396         return 0;
    397 
    398       return 2;
    399     }
    400     return 0;
    401   }
    402 
    403   /// Returns the modified column limit for \p I if it is inside a macro and
    404   /// needs a trailing '\'.
    405   unsigned
    406   limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
    407                          SmallVectorImpl<AnnotatedLine *>::const_iterator E,
    408                          unsigned Limit) {
    409     if (I[0]->InPPDirective && I + 1 != E &&
    410         !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
    411       return Limit < 2 ? 0 : Limit - 2;
    412     }
    413     return Limit;
    414   }
    415 
    416   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
    417                            unsigned Limit) {
    418     if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
    419       return false;
    420     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
    421   }
    422 
    423   bool containsMustBreak(const AnnotatedLine *Line) {
    424     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
    425       if (Tok->MustBreakBefore)
    426         return true;
    427     }
    428     return false;
    429   }
    430 
    431   void join(AnnotatedLine &A, const AnnotatedLine &B) {
    432     assert(!A.Last->Next);
    433     assert(!B.First->Previous);
    434     if (B.Affected)
    435       A.Affected = true;
    436     A.Last->Next = B.First;
    437     B.First->Previous = A.Last;
    438     B.First->CanBreakBefore = true;
    439     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
    440     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
    441       Tok->TotalLength += LengthA;
    442       A.Last = Tok;
    443     }
    444   }
    445 
    446   const FormatStyle &Style;
    447   const AdditionalKeywords &Keywords;
    448   const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
    449 
    450   SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
    451 };
    452 
    453 static void markFinalized(FormatToken *Tok) {
    454   for (; Tok; Tok = Tok->Next) {
    455     Tok->Finalized = true;
    456     for (AnnotatedLine *Child : Tok->Children)
    457       markFinalized(Child->First);
    458   }
    459 }
    460 
    461 #ifndef NDEBUG
    462 static void printLineState(const LineState &State) {
    463   llvm::dbgs() << "State: ";
    464   for (const ParenState &P : State.Stack) {
    465     llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
    466                  << " ";
    467   }
    468   llvm::dbgs() << State.NextToken->TokenText << "\n";
    469 }
    470 #endif
    471 
    472 /// \brief Base class for classes that format one \c AnnotatedLine.
    473 class LineFormatter {
    474 public:
    475   LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
    476                 const FormatStyle &Style,
    477                 UnwrappedLineFormatter *BlockFormatter)
    478       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
    479         BlockFormatter(BlockFormatter) {}
    480   virtual ~LineFormatter() {}
    481 
    482   /// \brief Formats an \c AnnotatedLine and returns the penalty.
    483   ///
    484   /// If \p DryRun is \c false, directly applies the changes.
    485   virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
    486                               bool DryRun) = 0;
    487 
    488 protected:
    489   /// \brief If the \p State's next token is an r_brace closing a nested block,
    490   /// format the nested block before it.
    491   ///
    492   /// Returns \c true if all children could be placed successfully and adapts
    493   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
    494   /// creates changes using \c Whitespaces.
    495   ///
    496   /// The crucial idea here is that children always get formatted upon
    497   /// encountering the closing brace right after the nested block. Now, if we
    498   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
    499   /// \c false), the entire block has to be kept on the same line (which is only
    500   /// possible if it fits on the line, only contains a single statement, etc.
    501   ///
    502   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
    503   /// break after the "{", format all lines with correct indentation and the put
    504   /// the closing "}" on yet another new line.
    505   ///
    506   /// This enables us to keep the simple structure of the
    507   /// \c UnwrappedLineFormatter, where we only have two options for each token:
    508   /// break or don't break.
    509   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
    510                       unsigned &Penalty) {
    511     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
    512     FormatToken &Previous = *State.NextToken->Previous;
    513     if (!LBrace || LBrace->isNot(tok::l_brace) ||
    514         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
    515       // The previous token does not open a block. Nothing to do. We don't
    516       // assert so that we can simply call this function for all tokens.
    517       return true;
    518 
    519     if (NewLine) {
    520       int AdditionalIndent = State.Stack.back().Indent -
    521                              Previous.Children[0]->Level * Style.IndentWidth;
    522 
    523       Penalty +=
    524           BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
    525                                  /*FixBadIndentation=*/true);
    526       return true;
    527     }
    528 
    529     if (Previous.Children[0]->First->MustBreakBefore)
    530       return false;
    531 
    532     // Cannot merge multiple statements into a single line.
    533     if (Previous.Children.size() > 1)
    534       return false;
    535 
    536     // Cannot merge into one line if this line ends on a comment.
    537     if (Previous.is(tok::comment))
    538       return false;
    539 
    540     // We can't put the closing "}" on a line with a trailing comment.
    541     if (Previous.Children[0]->Last->isTrailingComment())
    542       return false;
    543 
    544     // If the child line exceeds the column limit, we wouldn't want to merge it.
    545     // We add +2 for the trailing " }".
    546     if (Style.ColumnLimit > 0 &&
    547         Previous.Children[0]->Last->TotalLength + State.Column + 2 >
    548             Style.ColumnLimit)
    549       return false;
    550 
    551     if (!DryRun) {
    552       Whitespaces->replaceWhitespace(
    553           *Previous.Children[0]->First,
    554           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
    555           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
    556     }
    557     Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun);
    558 
    559     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
    560     return true;
    561   }
    562 
    563   ContinuationIndenter *Indenter;
    564 
    565 private:
    566   WhitespaceManager *Whitespaces;
    567   const FormatStyle &Style;
    568   UnwrappedLineFormatter *BlockFormatter;
    569 };
    570 
    571 /// \brief Formatter that keeps the existing line breaks.
    572 class NoColumnLimitLineFormatter : public LineFormatter {
    573 public:
    574   NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
    575                              WhitespaceManager *Whitespaces,
    576                              const FormatStyle &Style,
    577                              UnwrappedLineFormatter *BlockFormatter)
    578       : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
    579 
    580   /// \brief Formats the line, simply keeping all of the input's line breaking
    581   /// decisions.
    582   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
    583                       bool DryRun) override {
    584     assert(!DryRun);
    585     LineState State =
    586         Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
    587     while (State.NextToken) {
    588       bool Newline =
    589           Indenter->mustBreak(State) ||
    590           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
    591       unsigned Penalty = 0;
    592       formatChildren(State, Newline, /*DryRun=*/false, Penalty);
    593       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
    594     }
    595     return 0;
    596   }
    597 };
    598 
    599 /// \brief Formatter that puts all tokens into a single line without breaks.
    600 class NoLineBreakFormatter : public LineFormatter {
    601 public:
    602   NoLineBreakFormatter(ContinuationIndenter *Indenter,
    603                        WhitespaceManager *Whitespaces, const FormatStyle &Style,
    604                        UnwrappedLineFormatter *BlockFormatter)
    605       : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
    606 
    607   /// \brief Puts all tokens into a single line.
    608   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
    609                       bool DryRun) override {
    610     unsigned Penalty = 0;
    611     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
    612     while (State.NextToken) {
    613       formatChildren(State, /*Newline=*/false, DryRun, Penalty);
    614       Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
    615     }
    616     return Penalty;
    617   }
    618 };
    619 
    620 /// \brief Finds the best way to break lines.
    621 class OptimizingLineFormatter : public LineFormatter {
    622 public:
    623   OptimizingLineFormatter(ContinuationIndenter *Indenter,
    624                           WhitespaceManager *Whitespaces,
    625                           const FormatStyle &Style,
    626                           UnwrappedLineFormatter *BlockFormatter)
    627       : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
    628 
    629   /// \brief Formats the line by finding the best line breaks with line lengths
    630   /// below the column limit.
    631   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
    632                       bool DryRun) override {
    633     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
    634 
    635     // If the ObjC method declaration does not fit on a line, we should format
    636     // it with one arg per line.
    637     if (State.Line->Type == LT_ObjCMethodDecl)
    638       State.Stack.back().BreakBeforeParameter = true;
    639 
    640     // Find best solution in solution space.
    641     return analyzeSolutionSpace(State, DryRun);
    642   }
    643 
    644 private:
    645   struct CompareLineStatePointers {
    646     bool operator()(LineState *obj1, LineState *obj2) const {
    647       return *obj1 < *obj2;
    648     }
    649   };
    650 
    651   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
    652   ///
    653   /// In case of equal penalties, we want to prefer states that were inserted
    654   /// first. During state generation we make sure that we insert states first
    655   /// that break the line as late as possible.
    656   typedef std::pair<unsigned, unsigned> OrderedPenalty;
    657 
    658   /// \brief An edge in the solution space from \c Previous->State to \c State,
    659   /// inserting a newline dependent on the \c NewLine.
    660   struct StateNode {
    661     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
    662         : State(State), NewLine(NewLine), Previous(Previous) {}
    663     LineState State;
    664     bool NewLine;
    665     StateNode *Previous;
    666   };
    667 
    668   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
    669   /// \c State has the given \c OrderedPenalty.
    670   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
    671 
    672   /// \brief The BFS queue type.
    673   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
    674                               std::greater<QueueItem>> QueueType;
    675 
    676   /// \brief Analyze the entire solution space starting from \p InitialState.
    677   ///
    678   /// This implements a variant of Dijkstra's algorithm on the graph that spans
    679   /// the solution space (\c LineStates are the nodes). The algorithm tries to
    680   /// find the shortest path (the one with lowest penalty) from \p InitialState
    681   /// to a state where all tokens are placed. Returns the penalty.
    682   ///
    683   /// If \p DryRun is \c false, directly applies the changes.
    684   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
    685     std::set<LineState *, CompareLineStatePointers> Seen;
    686 
    687     // Increasing count of \c StateNode items we have created. This is used to
    688     // create a deterministic order independent of the container.
    689     unsigned Count = 0;
    690     QueueType Queue;
    691 
    692     // Insert start element into queue.
    693     StateNode *Node =
    694         new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
    695     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
    696     ++Count;
    697 
    698     unsigned Penalty = 0;
    699 
    700     // While not empty, take first element and follow edges.
    701     while (!Queue.empty()) {
    702       Penalty = Queue.top().first.first;
    703       StateNode *Node = Queue.top().second;
    704       if (!Node->State.NextToken) {
    705         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
    706         break;
    707       }
    708       Queue.pop();
    709 
    710       // Cut off the analysis of certain solutions if the analysis gets too
    711       // complex. See description of IgnoreStackForComparison.
    712       if (Count > 50000)
    713         Node->State.IgnoreStackForComparison = true;
    714 
    715       if (!Seen.insert(&Node->State).second)
    716         // State already examined with lower penalty.
    717         continue;
    718 
    719       FormatDecision LastFormat = Node->State.NextToken->Decision;
    720       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
    721         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
    722       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
    723         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
    724     }
    725 
    726     if (Queue.empty()) {
    727       // We were unable to find a solution, do nothing.
    728       // FIXME: Add diagnostic?
    729       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
    730       return 0;
    731     }
    732 
    733     // Reconstruct the solution.
    734     if (!DryRun)
    735       reconstructPath(InitialState, Queue.top().second);
    736 
    737     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
    738     DEBUG(llvm::dbgs() << "---\n");
    739 
    740     return Penalty;
    741   }
    742 
    743   /// \brief Add the following state to the analysis queue \c Queue.
    744   ///
    745   /// Assume the current state is \p PreviousNode and has been reached with a
    746   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
    747   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
    748                            bool NewLine, unsigned *Count, QueueType *Queue) {
    749     if (NewLine && !Indenter->canBreak(PreviousNode->State))
    750       return;
    751     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
    752       return;
    753 
    754     StateNode *Node = new (Allocator.Allocate())
    755         StateNode(PreviousNode->State, NewLine, PreviousNode);
    756     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
    757       return;
    758 
    759     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
    760 
    761     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
    762     ++(*Count);
    763   }
    764 
    765   /// \brief Applies the best formatting by reconstructing the path in the
    766   /// solution space that leads to \c Best.
    767   void reconstructPath(LineState &State, StateNode *Best) {
    768     std::deque<StateNode *> Path;
    769     // We do not need a break before the initial token.
    770     while (Best->Previous) {
    771       Path.push_front(Best);
    772       Best = Best->Previous;
    773     }
    774     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
    775          I != E; ++I) {
    776       unsigned Penalty = 0;
    777       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
    778       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
    779 
    780       DEBUG({
    781         printLineState((*I)->Previous->State);
    782         if ((*I)->NewLine) {
    783           llvm::dbgs() << "Penalty for placing "
    784                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
    785                        << Penalty << "\n";
    786         }
    787       });
    788     }
    789   }
    790 
    791   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
    792 };
    793 
    794 } // anonymous namespace
    795 
    796 unsigned
    797 UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
    798                                bool DryRun, int AdditionalIndent,
    799                                bool FixBadIndentation) {
    800   LineJoiner Joiner(Style, Keywords, Lines);
    801 
    802   // Try to look up already computed penalty in DryRun-mode.
    803   std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
    804       &Lines, AdditionalIndent);
    805   auto CacheIt = PenaltyCache.find(CacheKey);
    806   if (DryRun && CacheIt != PenaltyCache.end())
    807     return CacheIt->second;
    808 
    809   assert(!Lines.empty());
    810   unsigned Penalty = 0;
    811   LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
    812                                    AdditionalIndent);
    813   const AnnotatedLine *PreviousLine = nullptr;
    814   const AnnotatedLine *NextLine = nullptr;
    815 
    816   // The minimum level of consecutive lines that have been formatted.
    817   unsigned RangeMinLevel = UINT_MAX;
    818 
    819   for (const AnnotatedLine *Line =
    820            Joiner.getNextMergedLine(DryRun, IndentTracker);
    821        Line; Line = NextLine) {
    822     const AnnotatedLine &TheLine = *Line;
    823     unsigned Indent = IndentTracker.getIndent();
    824 
    825     // We continue formatting unchanged lines to adjust their indent, e.g. if a
    826     // scope was added. However, we need to carefully stop doing this when we
    827     // exit the scope of affected lines to prevent indenting a the entire
    828     // remaining file if it currently missing a closing brace.
    829     bool ContinueFormatting =
    830         TheLine.Level > RangeMinLevel ||
    831         (TheLine.Level == RangeMinLevel && !TheLine.startsWith(tok::r_brace));
    832 
    833     bool FixIndentation = (FixBadIndentation || ContinueFormatting) &&
    834                           Indent != TheLine.First->OriginalColumn;
    835     bool ShouldFormat = TheLine.Affected || FixIndentation;
    836     // We cannot format this line; if the reason is that the line had a
    837     // parsing error, remember that.
    838     if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat)
    839       *IncompleteFormat = true;
    840 
    841     if (ShouldFormat && TheLine.Type != LT_Invalid) {
    842       if (!DryRun)
    843         formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
    844                          TheLine.InPPDirective);
    845 
    846       NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
    847       unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
    848       bool FitsIntoOneLine =
    849           TheLine.Last->TotalLength + Indent <= ColumnLimit ||
    850           (TheLine.Type == LT_ImportStatement &&
    851            (Style.Language != FormatStyle::LK_JavaScript ||
    852             !Style.JavaScriptWrapImports));
    853 
    854       if (Style.ColumnLimit == 0)
    855         NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
    856             .formatLine(TheLine, Indent, DryRun);
    857       else if (FitsIntoOneLine)
    858         Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
    859                        .formatLine(TheLine, Indent, DryRun);
    860       else
    861         Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
    862                        .formatLine(TheLine, Indent, DryRun);
    863       RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
    864     } else {
    865       // If no token in the current line is affected, we still need to format
    866       // affected children.
    867       if (TheLine.ChildrenAffected)
    868         for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next)
    869           if (!Tok->Children.empty())
    870             format(Tok->Children, DryRun);
    871 
    872       // Adapt following lines on the current indent level to the same level
    873       // unless the current \c AnnotatedLine is not at the beginning of a line.
    874       bool StartsNewLine =
    875           TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
    876       if (StartsNewLine)
    877         IndentTracker.adjustToUnmodifiedLine(TheLine);
    878       if (!DryRun) {
    879         bool ReformatLeadingWhitespace =
    880             StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
    881                               TheLine.LeadingEmptyLinesAffected);
    882         // Format the first token.
    883         if (ReformatLeadingWhitespace)
    884           formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
    885                            TheLine.First->OriginalColumn,
    886                            TheLine.InPPDirective);
    887         else
    888           Whitespaces->addUntouchableToken(*TheLine.First,
    889                                            TheLine.InPPDirective);
    890 
    891         // Notify the WhitespaceManager about the unchanged whitespace.
    892         for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
    893           Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
    894       }
    895       NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
    896       RangeMinLevel = UINT_MAX;
    897     }
    898     if (!DryRun)
    899       markFinalized(TheLine.First);
    900     PreviousLine = &TheLine;
    901   }
    902   PenaltyCache[CacheKey] = Penalty;
    903   return Penalty;
    904 }
    905 
    906 void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
    907                                               const AnnotatedLine *PreviousLine,
    908                                               unsigned IndentLevel,
    909                                               unsigned Indent,
    910                                               bool InPPDirective) {
    911   if (RootToken.is(tok::eof)) {
    912     unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
    913     Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0,
    914                                    /*Spaces=*/0, /*TargetColumn=*/0);
    915     return;
    916   }
    917   unsigned Newlines =
    918       std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
    919   // Remove empty lines before "}" where applicable.
    920   if (RootToken.is(tok::r_brace) &&
    921       (!RootToken.Next ||
    922        (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
    923     Newlines = std::min(Newlines, 1u);
    924   if (Newlines == 0 && !RootToken.IsFirst)
    925     Newlines = 1;
    926   if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
    927     Newlines = 0;
    928 
    929   // Remove empty lines after "{".
    930   if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
    931       PreviousLine->Last->is(tok::l_brace) &&
    932       PreviousLine->First->isNot(tok::kw_namespace) &&
    933       !startsExternCBlock(*PreviousLine))
    934     Newlines = 1;
    935 
    936   // Insert extra new line before access specifiers.
    937   if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
    938       RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
    939     ++Newlines;
    940 
    941   // Remove empty lines after access specifiers.
    942   if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
    943       (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
    944     Newlines = std::min(1u, Newlines);
    945 
    946   Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
    947                                  Indent, InPPDirective &&
    948                                              !RootToken.HasUnescapedNewline);
    949 }
    950 
    951 unsigned
    952 UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
    953                                        const AnnotatedLine *NextLine) const {
    954   // In preprocessor directives reserve two chars for trailing " \" if the
    955   // next line continues the preprocessor directive.
    956   bool ContinuesPPDirective =
    957       InPPDirective &&
    958       // If there is no next line, this is likely a child line and the parent
    959       // continues the preprocessor directive.
    960       (!NextLine ||
    961        (NextLine->InPPDirective &&
    962         // If there is an unescaped newline between this line and the next, the
    963         // next line starts a new preprocessor directive.
    964         !NextLine->First->HasUnescapedNewline));
    965   return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
    966 }
    967 
    968 } // namespace format
    969 } // namespace clang
    970