Home | History | Annotate | Download | only in Format
      1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This file implements an indenter that manages the indentation of
     12 /// continuations.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
     17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
     18 
     19 #include "Encoding.h"
     20 #include "FormatToken.h"
     21 #include "clang/Format/Format.h"
     22 #include "llvm/Support/Regex.h"
     23 
     24 namespace clang {
     25 class SourceManager;
     26 
     27 namespace format {
     28 
     29 class AnnotatedLine;
     30 struct FormatToken;
     31 struct LineState;
     32 struct ParenState;
     33 class WhitespaceManager;
     34 
     35 class ContinuationIndenter {
     36 public:
     37   /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
     38   /// column \p FirstIndent.
     39   ContinuationIndenter(const FormatStyle &Style,
     40                        const AdditionalKeywords &Keywords,
     41                        const SourceManager &SourceMgr,
     42                        WhitespaceManager &Whitespaces,
     43                        encoding::Encoding Encoding,
     44                        bool BinPackInconclusiveFunctions);
     45 
     46   /// \brief Get the initial state, i.e. the state after placing \p Line's
     47   /// first token at \p FirstIndent.
     48   LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
     49                             bool DryRun);
     50 
     51   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
     52   // better home.
     53   /// \brief Returns \c true, if a line break after \p State is allowed.
     54   bool canBreak(const LineState &State);
     55 
     56   /// \brief Returns \c true, if a line break after \p State is mandatory.
     57   bool mustBreak(const LineState &State);
     58 
     59   /// \brief Appends the next token to \p State and updates information
     60   /// necessary for indentation.
     61   ///
     62   /// Puts the token on the current line if \p Newline is \c false and adds a
     63   /// line break and necessary indentation otherwise.
     64   ///
     65   /// If \p DryRun is \c false, also creates and stores the required
     66   /// \c Replacement.
     67   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
     68                            unsigned ExtraSpaces = 0);
     69 
     70   /// \brief Get the column limit for this line. This is the style's column
     71   /// limit, potentially reduced for preprocessor definitions.
     72   unsigned getColumnLimit(const LineState &State) const;
     73 
     74 private:
     75   /// \brief Mark the next token as consumed in \p State and modify its stacks
     76   /// accordingly.
     77   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
     78 
     79   /// \brief Update 'State' according to the next token's fake left parentheses.
     80   void moveStatePastFakeLParens(LineState &State, bool Newline);
     81   /// \brief Update 'State' according to the next token's fake r_parens.
     82   void moveStatePastFakeRParens(LineState &State);
     83 
     84   /// \brief Update 'State' according to the next token being one of "(<{[".
     85   void moveStatePastScopeOpener(LineState &State, bool Newline);
     86   /// \brief Update 'State' according to the next token being one of ")>}]".
     87   void moveStatePastScopeCloser(LineState &State);
     88   /// \brief Update 'State' with the next token opening a nested block.
     89   void moveStateToNewBlock(LineState &State);
     90 
     91   /// \brief If the current token sticks out over the end of the line, break
     92   /// it if possible.
     93   ///
     94   /// \returns An extra penalty if a token was broken, otherwise 0.
     95   ///
     96   /// The returned penalty will cover the cost of the additional line breaks and
     97   /// column limit violation in all lines except for the last one. The penalty
     98   /// for the column limit violation in the last line (and in single line
     99   /// tokens) is handled in \c addNextStateToQueue.
    100   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
    101                                 bool DryRun);
    102 
    103   /// \brief Appends the next token to \p State and updates information
    104   /// necessary for indentation.
    105   ///
    106   /// Puts the token on the current line.
    107   ///
    108   /// If \p DryRun is \c false, also creates and stores the required
    109   /// \c Replacement.
    110   void addTokenOnCurrentLine(LineState &State, bool DryRun,
    111                              unsigned ExtraSpaces);
    112 
    113   /// \brief Appends the next token to \p State and updates information
    114   /// necessary for indentation.
    115   ///
    116   /// Adds a line break and necessary indentation.
    117   ///
    118   /// If \p DryRun is \c false, also creates and stores the required
    119   /// \c Replacement.
    120   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
    121 
    122   /// \brief Calculate the new column for a line wrap before the next token.
    123   unsigned getNewLineColumn(const LineState &State);
    124 
    125   /// \brief Adds a multiline token to the \p State.
    126   ///
    127   /// \returns Extra penalty for the first line of the literal: last line is
    128   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
    129   /// matter, as we don't change them.
    130   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
    131 
    132   /// \brief Returns \c true if the next token starts a multiline string
    133   /// literal.
    134   ///
    135   /// This includes implicitly concatenated strings, strings that will be broken
    136   /// by clang-format and string literals with escaped newlines.
    137   bool nextIsMultilineString(const LineState &State);
    138 
    139   FormatStyle Style;
    140   const AdditionalKeywords &Keywords;
    141   const SourceManager &SourceMgr;
    142   WhitespaceManager &Whitespaces;
    143   encoding::Encoding Encoding;
    144   bool BinPackInconclusiveFunctions;
    145   llvm::Regex CommentPragmasRegex;
    146 };
    147 
    148 struct ParenState {
    149   ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
    150              bool AvoidBinPacking, bool NoLineBreak)
    151       : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
    152         NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
    153         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
    154         NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
    155         ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
    156         AlignColons(true), ObjCSelectorNameFound(false),
    157         HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
    158 
    159   /// \brief The position to which a specific parenthesis level needs to be
    160   /// indented.
    161   unsigned Indent;
    162 
    163   /// \brief The number of indentation levels of the block.
    164   unsigned IndentLevel;
    165 
    166   /// \brief The position of the last space on each level.
    167   ///
    168   /// Used e.g. to break like:
    169   /// functionCall(Parameter, otherCall(
    170   ///                             OtherParameter));
    171   unsigned LastSpace;
    172 
    173   /// \brief If a block relative to this parenthesis level gets wrapped, indent
    174   /// it this much.
    175   unsigned NestedBlockIndent;
    176 
    177   /// \brief The position the first "<<" operator encountered on each level.
    178   ///
    179   /// Used to align "<<" operators. 0 if no such operator has been encountered
    180   /// on a level.
    181   unsigned FirstLessLess = 0;
    182 
    183   /// \brief The column of a \c ? in a conditional expression;
    184   unsigned QuestionColumn = 0;
    185 
    186   /// \brief The position of the colon in an ObjC method declaration/call.
    187   unsigned ColonPos = 0;
    188 
    189   /// \brief The start of the most recent function in a builder-type call.
    190   unsigned StartOfFunctionCall = 0;
    191 
    192   /// \brief Contains the start of array subscript expressions, so that they
    193   /// can be aligned.
    194   unsigned StartOfArraySubscripts = 0;
    195 
    196   /// \brief If a nested name specifier was broken over multiple lines, this
    197   /// contains the start column of the second line. Otherwise 0.
    198   unsigned NestedNameSpecifierContinuation = 0;
    199 
    200   /// \brief If a call expression was broken over multiple lines, this
    201   /// contains the start column of the second line. Otherwise 0.
    202   unsigned CallContinuation = 0;
    203 
    204   /// \brief The column of the first variable name in a variable declaration.
    205   ///
    206   /// Used to align further variables if necessary.
    207   unsigned VariablePos = 0;
    208 
    209   /// \brief Whether a newline needs to be inserted before the block's closing
    210   /// brace.
    211   ///
    212   /// We only want to insert a newline before the closing brace if there also
    213   /// was a newline after the beginning left brace.
    214   bool BreakBeforeClosingBrace : 1;
    215 
    216   /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
    217   /// lines, in this context.
    218   bool AvoidBinPacking : 1;
    219 
    220   /// \brief Break after the next comma (or all the commas in this context if
    221   /// \c AvoidBinPacking is \c true).
    222   bool BreakBeforeParameter : 1;
    223 
    224   /// \brief Line breaking in this context would break a formatting rule.
    225   bool NoLineBreak : 1;
    226 
    227   /// \brief True if the last binary operator on this level was wrapped to the
    228   /// next line.
    229   bool LastOperatorWrapped : 1;
    230 
    231   /// \brief \c true if this \c ParenState already contains a line-break.
    232   ///
    233   /// The first line break in a certain \c ParenState causes extra penalty so
    234   /// that clang-format prefers similar breaks, i.e. breaks in the same
    235   /// parenthesis.
    236   bool ContainsLineBreak : 1;
    237 
    238   /// \brief \c true if this \c ParenState contains multiple segments of a
    239   /// builder-type call on one line.
    240   bool ContainsUnwrappedBuilder : 1;
    241 
    242   /// \brief \c true if the colons of the curren ObjC method expression should
    243   /// be aligned.
    244   ///
    245   /// Not considered for memoization as it will always have the same value at
    246   /// the same token.
    247   bool AlignColons : 1;
    248 
    249   /// \brief \c true if at least one selector name was found in the current
    250   /// ObjC method expression.
    251   ///
    252   /// Not considered for memoization as it will always have the same value at
    253   /// the same token.
    254   bool ObjCSelectorNameFound : 1;
    255 
    256   /// \brief \c true if there are multiple nested blocks inside these parens.
    257   ///
    258   /// Not considered for memoization as it will always have the same value at
    259   /// the same token.
    260   bool HasMultipleNestedBlocks : 1;
    261 
    262   // \brief The start of a nested block (e.g. lambda introducer in C++ or
    263   // "function" in JavaScript) is not wrapped to a new line.
    264   bool NestedBlockInlined : 1;
    265 
    266   bool operator<(const ParenState &Other) const {
    267     if (Indent != Other.Indent)
    268       return Indent < Other.Indent;
    269     if (LastSpace != Other.LastSpace)
    270       return LastSpace < Other.LastSpace;
    271     if (NestedBlockIndent != Other.NestedBlockIndent)
    272       return NestedBlockIndent < Other.NestedBlockIndent;
    273     if (FirstLessLess != Other.FirstLessLess)
    274       return FirstLessLess < Other.FirstLessLess;
    275     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
    276       return BreakBeforeClosingBrace;
    277     if (QuestionColumn != Other.QuestionColumn)
    278       return QuestionColumn < Other.QuestionColumn;
    279     if (AvoidBinPacking != Other.AvoidBinPacking)
    280       return AvoidBinPacking;
    281     if (BreakBeforeParameter != Other.BreakBeforeParameter)
    282       return BreakBeforeParameter;
    283     if (NoLineBreak != Other.NoLineBreak)
    284       return NoLineBreak;
    285     if (LastOperatorWrapped != Other.LastOperatorWrapped)
    286       return LastOperatorWrapped;
    287     if (ColonPos != Other.ColonPos)
    288       return ColonPos < Other.ColonPos;
    289     if (StartOfFunctionCall != Other.StartOfFunctionCall)
    290       return StartOfFunctionCall < Other.StartOfFunctionCall;
    291     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
    292       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
    293     if (CallContinuation != Other.CallContinuation)
    294       return CallContinuation < Other.CallContinuation;
    295     if (VariablePos != Other.VariablePos)
    296       return VariablePos < Other.VariablePos;
    297     if (ContainsLineBreak != Other.ContainsLineBreak)
    298       return ContainsLineBreak;
    299     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
    300       return ContainsUnwrappedBuilder;
    301     if (NestedBlockInlined != Other.NestedBlockInlined)
    302       return NestedBlockInlined;
    303     return false;
    304   }
    305 };
    306 
    307 /// \brief The current state when indenting a unwrapped line.
    308 ///
    309 /// As the indenting tries different combinations this is copied by value.
    310 struct LineState {
    311   /// \brief The number of used columns in the current line.
    312   unsigned Column;
    313 
    314   /// \brief The token that needs to be next formatted.
    315   FormatToken *NextToken;
    316 
    317   /// \brief \c true if this line contains a continued for-loop section.
    318   bool LineContainsContinuedForLoopSection;
    319 
    320   /// \brief The \c NestingLevel at the start of this line.
    321   unsigned StartOfLineLevel;
    322 
    323   /// \brief The lowest \c NestingLevel on the current line.
    324   unsigned LowestLevelOnLine;
    325 
    326   /// \brief The start column of the string literal, if we're in a string
    327   /// literal sequence, 0 otherwise.
    328   unsigned StartOfStringLiteral;
    329 
    330   /// \brief A stack keeping track of properties applying to parenthesis
    331   /// levels.
    332   std::vector<ParenState> Stack;
    333 
    334   /// \brief Ignore the stack of \c ParenStates for state comparison.
    335   ///
    336   /// In long and deeply nested unwrapped lines, the current algorithm can
    337   /// be insufficient for finding the best formatting with a reasonable amount
    338   /// of time and memory. Setting this flag will effectively lead to the
    339   /// algorithm not analyzing some combinations. However, these combinations
    340   /// rarely contain the optimal solution: In short, accepting a higher
    341   /// penalty early would need to lead to different values in the \c
    342   /// ParenState stack (in an otherwise identical state) and these different
    343   /// values would need to lead to a significant amount of avoided penalty
    344   /// later.
    345   ///
    346   /// FIXME: Come up with a better algorithm instead.
    347   bool IgnoreStackForComparison;
    348 
    349   /// \brief The indent of the first token.
    350   unsigned FirstIndent;
    351 
    352   /// \brief The line that is being formatted.
    353   ///
    354   /// Does not need to be considered for memoization because it doesn't change.
    355   const AnnotatedLine *Line;
    356 
    357   /// \brief Comparison operator to be able to used \c LineState in \c map.
    358   bool operator<(const LineState &Other) const {
    359     if (NextToken != Other.NextToken)
    360       return NextToken < Other.NextToken;
    361     if (Column != Other.Column)
    362       return Column < Other.Column;
    363     if (LineContainsContinuedForLoopSection !=
    364         Other.LineContainsContinuedForLoopSection)
    365       return LineContainsContinuedForLoopSection;
    366     if (StartOfLineLevel != Other.StartOfLineLevel)
    367       return StartOfLineLevel < Other.StartOfLineLevel;
    368     if (LowestLevelOnLine != Other.LowestLevelOnLine)
    369       return LowestLevelOnLine < Other.LowestLevelOnLine;
    370     if (StartOfStringLiteral != Other.StartOfStringLiteral)
    371       return StartOfStringLiteral < Other.StartOfStringLiteral;
    372     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
    373       return false;
    374     return Stack < Other.Stack;
    375   }
    376 };
    377 
    378 } // end namespace format
    379 } // end namespace clang
    380 
    381 #endif
    382