Home | History | Annotate | Download | only in Format
      1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This file implements an indenter that manages the indentation of
     12 /// continuations.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
     17 #define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
     18 
     19 #include "Encoding.h"
     20 #include "clang/Format/Format.h"
     21 #include "llvm/Support/Regex.h"
     22 
     23 namespace clang {
     24 class SourceManager;
     25 
     26 namespace format {
     27 
     28 class AnnotatedLine;
     29 struct FormatToken;
     30 struct LineState;
     31 struct ParenState;
     32 class WhitespaceManager;
     33 
     34 class ContinuationIndenter {
     35 public:
     36   /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
     37   /// column \p FirstIndent.
     38   ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr,
     39                        WhitespaceManager &Whitespaces,
     40                        encoding::Encoding Encoding,
     41                        bool BinPackInconclusiveFunctions);
     42 
     43   /// \brief Get the initial state, i.e. the state after placing \p Line's
     44   /// first token at \p FirstIndent.
     45   LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
     46                             bool DryRun);
     47 
     48   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
     49   // better home.
     50   /// \brief Returns \c true, if a line break after \p State is allowed.
     51   bool canBreak(const LineState &State);
     52 
     53   /// \brief Returns \c true, if a line break after \p State is mandatory.
     54   bool mustBreak(const LineState &State);
     55 
     56   /// \brief Appends the next token to \p State and updates information
     57   /// necessary for indentation.
     58   ///
     59   /// Puts the token on the current line if \p Newline is \c false and adds a
     60   /// line break and necessary indentation otherwise.
     61   ///
     62   /// If \p DryRun is \c false, also creates and stores the required
     63   /// \c Replacement.
     64   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
     65                            unsigned ExtraSpaces = 0);
     66 
     67   /// \brief Get the column limit for this line. This is the style's column
     68   /// limit, potentially reduced for preprocessor definitions.
     69   unsigned getColumnLimit(const LineState &State) const;
     70 
     71 private:
     72   /// \brief Mark the next token as consumed in \p State and modify its stacks
     73   /// accordingly.
     74   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
     75 
     76   /// \brief Update 'State' according to the next token's fake left parentheses.
     77   void moveStatePastFakeLParens(LineState &State, bool Newline);
     78   /// \brief Update 'State' according to the next token's fake r_parens.
     79   void moveStatePastFakeRParens(LineState &State);
     80 
     81   /// \brief Update 'State' according to the next token being one of "(<{[".
     82   void moveStatePastScopeOpener(LineState &State, bool Newline);
     83   /// \brief Update 'State' according to the next token being one of ")>}]".
     84   void moveStatePastScopeCloser(LineState &State);
     85   /// \brief Update 'State' with the next token opening a nested block.
     86   void moveStateToNewBlock(LineState &State);
     87 
     88   /// \brief If the current token sticks out over the end of the line, break
     89   /// it if possible.
     90   ///
     91   /// \returns An extra penalty if a token was broken, otherwise 0.
     92   ///
     93   /// The returned penalty will cover the cost of the additional line breaks and
     94   /// column limit violation in all lines except for the last one. The penalty
     95   /// for the column limit violation in the last line (and in single line
     96   /// tokens) is handled in \c addNextStateToQueue.
     97   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
     98                                 bool DryRun);
     99 
    100   /// \brief Appends the next token to \p State and updates information
    101   /// necessary for indentation.
    102   ///
    103   /// Puts the token on the current line.
    104   ///
    105   /// If \p DryRun is \c false, also creates and stores the required
    106   /// \c Replacement.
    107   void addTokenOnCurrentLine(LineState &State, bool DryRun,
    108                              unsigned ExtraSpaces);
    109 
    110   /// \brief Appends the next token to \p State and updates information
    111   /// necessary for indentation.
    112   ///
    113   /// Adds a line break and necessary indentation.
    114   ///
    115   /// If \p DryRun is \c false, also creates and stores the required
    116   /// \c Replacement.
    117   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
    118 
    119   /// \brief Calculate the new column for a line wrap before the next token.
    120   unsigned getNewLineColumn(const LineState &State);
    121 
    122   /// \brief Adds a multiline token to the \p State.
    123   ///
    124   /// \returns Extra penalty for the first line of the literal: last line is
    125   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
    126   /// matter, as we don't change them.
    127   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
    128 
    129   /// \brief Returns \c true if the next token starts a multiline string
    130   /// literal.
    131   ///
    132   /// This includes implicitly concatenated strings, strings that will be broken
    133   /// by clang-format and string literals with escaped newlines.
    134   bool nextIsMultilineString(const LineState &State);
    135 
    136   FormatStyle Style;
    137   SourceManager &SourceMgr;
    138   WhitespaceManager &Whitespaces;
    139   encoding::Encoding Encoding;
    140   bool BinPackInconclusiveFunctions;
    141   llvm::Regex CommentPragmasRegex;
    142 };
    143 
    144 struct ParenState {
    145   ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
    146              bool AvoidBinPacking, bool NoLineBreak)
    147       : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
    148         FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
    149         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
    150         NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0),
    151         StartOfFunctionCall(0), StartOfArraySubscripts(0),
    152         NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0),
    153         ContainsLineBreak(false), ContainsUnwrappedBuilder(0),
    154         AlignColons(true), ObjCSelectorNameFound(false),
    155         HasMultipleNestedBlocks(false), JSFunctionInlined(false) {}
    156 
    157   /// \brief The position to which a specific parenthesis level needs to be
    158   /// indented.
    159   unsigned Indent;
    160 
    161   /// \brief The number of indentation levels of the block.
    162   unsigned IndentLevel;
    163 
    164   /// \brief The position of the last space on each level.
    165   ///
    166   /// Used e.g. to break like:
    167   /// functionCall(Parameter, otherCall(
    168   ///                             OtherParameter));
    169   unsigned LastSpace;
    170 
    171   /// \brief The position the first "<<" operator encountered on each level.
    172   ///
    173   /// Used to align "<<" operators. 0 if no such operator has been encountered
    174   /// on a level.
    175   unsigned FirstLessLess;
    176 
    177   /// \brief Whether a newline needs to be inserted before the block's closing
    178   /// brace.
    179   ///
    180   /// We only want to insert a newline before the closing brace if there also
    181   /// was a newline after the beginning left brace.
    182   bool BreakBeforeClosingBrace;
    183 
    184   /// \brief The column of a \c ? in a conditional expression;
    185   unsigned QuestionColumn;
    186 
    187   /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
    188   /// lines, in this context.
    189   bool AvoidBinPacking;
    190 
    191   /// \brief Break after the next comma (or all the commas in this context if
    192   /// \c AvoidBinPacking is \c true).
    193   bool BreakBeforeParameter;
    194 
    195   /// \brief Line breaking in this context would break a formatting rule.
    196   bool NoLineBreak;
    197 
    198   /// \brief True if the last binary operator on this level was wrapped to the
    199   /// next line.
    200   bool LastOperatorWrapped;
    201 
    202   /// \brief The position of the colon in an ObjC method declaration/call.
    203   unsigned ColonPos;
    204 
    205   /// \brief The start of the most recent function in a builder-type call.
    206   unsigned StartOfFunctionCall;
    207 
    208   /// \brief Contains the start of array subscript expressions, so that they
    209   /// can be aligned.
    210   unsigned StartOfArraySubscripts;
    211 
    212   /// \brief If a nested name specifier was broken over multiple lines, this
    213   /// contains the start column of the second line. Otherwise 0.
    214   unsigned NestedNameSpecifierContinuation;
    215 
    216   /// \brief If a call expression was broken over multiple lines, this
    217   /// contains the start column of the second line. Otherwise 0.
    218   unsigned CallContinuation;
    219 
    220   /// \brief The column of the first variable name in a variable declaration.
    221   ///
    222   /// Used to align further variables if necessary.
    223   unsigned VariablePos;
    224 
    225   /// \brief \c true if this \c ParenState already contains a line-break.
    226   ///
    227   /// The first line break in a certain \c ParenState causes extra penalty so
    228   /// that clang-format prefers similar breaks, i.e. breaks in the same
    229   /// parenthesis.
    230   bool ContainsLineBreak;
    231 
    232   /// \brief \c true if this \c ParenState contains multiple segments of a
    233   /// builder-type call on one line.
    234   bool ContainsUnwrappedBuilder;
    235 
    236   /// \brief \c true if the colons of the curren ObjC method expression should
    237   /// be aligned.
    238   ///
    239   /// Not considered for memoization as it will always have the same value at
    240   /// the same token.
    241   bool AlignColons;
    242 
    243   /// \brief \c true if at least one selector name was found in the current
    244   /// ObjC method expression.
    245   ///
    246   /// Not considered for memoization as it will always have the same value at
    247   /// the same token.
    248   bool ObjCSelectorNameFound;
    249 
    250   /// \brief \c true if there are multiple nested blocks inside these parens.
    251   ///
    252   /// Not considered for memoization as it will always have the same value at
    253   /// the same token.
    254   bool HasMultipleNestedBlocks;
    255 
    256   // \brief The previous JavaScript 'function' keyword is not wrapped to a new
    257   // line.
    258   bool JSFunctionInlined;
    259 
    260   bool operator<(const ParenState &Other) const {
    261     if (Indent != Other.Indent)
    262       return Indent < Other.Indent;
    263     if (LastSpace != Other.LastSpace)
    264       return LastSpace < Other.LastSpace;
    265     if (FirstLessLess != Other.FirstLessLess)
    266       return FirstLessLess < Other.FirstLessLess;
    267     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
    268       return BreakBeforeClosingBrace;
    269     if (QuestionColumn != Other.QuestionColumn)
    270       return QuestionColumn < Other.QuestionColumn;
    271     if (AvoidBinPacking != Other.AvoidBinPacking)
    272       return AvoidBinPacking;
    273     if (BreakBeforeParameter != Other.BreakBeforeParameter)
    274       return BreakBeforeParameter;
    275     if (NoLineBreak != Other.NoLineBreak)
    276       return NoLineBreak;
    277     if (LastOperatorWrapped != Other.LastOperatorWrapped)
    278       return LastOperatorWrapped;
    279     if (ColonPos != Other.ColonPos)
    280       return ColonPos < Other.ColonPos;
    281     if (StartOfFunctionCall != Other.StartOfFunctionCall)
    282       return StartOfFunctionCall < Other.StartOfFunctionCall;
    283     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
    284       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
    285     if (CallContinuation != Other.CallContinuation)
    286       return CallContinuation < Other.CallContinuation;
    287     if (VariablePos != Other.VariablePos)
    288       return VariablePos < Other.VariablePos;
    289     if (ContainsLineBreak != Other.ContainsLineBreak)
    290       return ContainsLineBreak < Other.ContainsLineBreak;
    291     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
    292       return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
    293     if (JSFunctionInlined != Other.JSFunctionInlined)
    294       return JSFunctionInlined < Other.JSFunctionInlined;
    295     return false;
    296   }
    297 };
    298 
    299 /// \brief The current state when indenting a unwrapped line.
    300 ///
    301 /// As the indenting tries different combinations this is copied by value.
    302 struct LineState {
    303   /// \brief The number of used columns in the current line.
    304   unsigned Column;
    305 
    306   /// \brief The token that needs to be next formatted.
    307   FormatToken *NextToken;
    308 
    309   /// \brief \c true if this line contains a continued for-loop section.
    310   bool LineContainsContinuedForLoopSection;
    311 
    312   /// \brief The \c NestingLevel at the start of this line.
    313   unsigned StartOfLineLevel;
    314 
    315   /// \brief The lowest \c NestingLevel on the current line.
    316   unsigned LowestLevelOnLine;
    317 
    318   /// \brief The start column of the string literal, if we're in a string
    319   /// literal sequence, 0 otherwise.
    320   unsigned StartOfStringLiteral;
    321 
    322   /// \brief A stack keeping track of properties applying to parenthesis
    323   /// levels.
    324   std::vector<ParenState> Stack;
    325 
    326   /// \brief Ignore the stack of \c ParenStates for state comparison.
    327   ///
    328   /// In long and deeply nested unwrapped lines, the current algorithm can
    329   /// be insufficient for finding the best formatting with a reasonable amount
    330   /// of time and memory. Setting this flag will effectively lead to the
    331   /// algorithm not analyzing some combinations. However, these combinations
    332   /// rarely contain the optimal solution: In short, accepting a higher
    333   /// penalty early would need to lead to different values in the \c
    334   /// ParenState stack (in an otherwise identical state) and these different
    335   /// values would need to lead to a significant amount of avoided penalty
    336   /// later.
    337   ///
    338   /// FIXME: Come up with a better algorithm instead.
    339   bool IgnoreStackForComparison;
    340 
    341   /// \brief The indent of the first token.
    342   unsigned FirstIndent;
    343 
    344   /// \brief The line that is being formatted.
    345   ///
    346   /// Does not need to be considered for memoization because it doesn't change.
    347   const AnnotatedLine *Line;
    348 
    349   /// \brief Comparison operator to be able to used \c LineState in \c map.
    350   bool operator<(const LineState &Other) const {
    351     if (NextToken != Other.NextToken)
    352       return NextToken < Other.NextToken;
    353     if (Column != Other.Column)
    354       return Column < Other.Column;
    355     if (LineContainsContinuedForLoopSection !=
    356         Other.LineContainsContinuedForLoopSection)
    357       return LineContainsContinuedForLoopSection;
    358     if (StartOfLineLevel != Other.StartOfLineLevel)
    359       return StartOfLineLevel < Other.StartOfLineLevel;
    360     if (LowestLevelOnLine != Other.LowestLevelOnLine)
    361       return LowestLevelOnLine < Other.LowestLevelOnLine;
    362     if (StartOfStringLiteral != Other.StartOfStringLiteral)
    363       return StartOfStringLiteral < Other.StartOfStringLiteral;
    364     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
    365       return false;
    366     return Stack < Other.Stack;
    367   }
    368 };
    369 
    370 } // end namespace format
    371 } // end namespace clang
    372 
    373 #endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
    374