Home | History | Annotate | Download | only in Format
      1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This file implements a token annotator, i.e. creates
     12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
     17 #define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
     18 
     19 #include "UnwrappedLineParser.h"
     20 #include "clang/Basic/OperatorPrecedence.h"
     21 #include "clang/Format/Format.h"
     22 #include <string>
     23 
     24 namespace clang {
     25 class Lexer;
     26 class SourceManager;
     27 
     28 namespace format {
     29 
     30 enum TokenType {
     31   TT_BinaryOperator,
     32   TT_BlockComment,
     33   TT_CastRParen,
     34   TT_ConditionalExpr,
     35   TT_CtorInitializerColon,
     36   TT_ImplicitStringLiteral,
     37   TT_InlineASMColon,
     38   TT_InheritanceColon,
     39   TT_LineComment,
     40   TT_ObjCArrayLiteral,
     41   TT_ObjCBlockLParen,
     42   TT_ObjCDecl,
     43   TT_ObjCForIn,
     44   TT_ObjCMethodExpr,
     45   TT_ObjCMethodSpecifier,
     46   TT_ObjCProperty,
     47   TT_ObjCSelectorName,
     48   TT_OverloadedOperatorLParen,
     49   TT_PointerOrReference,
     50   TT_PureVirtualSpecifier,
     51   TT_RangeBasedForLoopColon,
     52   TT_StartOfName,
     53   TT_TemplateCloser,
     54   TT_TemplateOpener,
     55   TT_TrailingUnaryOperator,
     56   TT_UnaryOperator,
     57   TT_Unknown
     58 };
     59 
     60 enum LineType {
     61   LT_Invalid,
     62   LT_Other,
     63   LT_BuilderTypeCall,
     64   LT_PreprocessorDirective,
     65   LT_VirtualFunctionDecl,
     66   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
     67   LT_ObjCMethodDecl,
     68   LT_ObjCProperty // An @property line.
     69 };
     70 
     71 class AnnotatedToken {
     72 public:
     73   explicit AnnotatedToken(const FormatToken &FormatTok)
     74       : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
     75         CanBreakBefore(false), MustBreakBefore(false),
     76         ClosesTemplateDeclaration(false), MatchingParen(NULL),
     77         ParameterCount(0), BindingStrength(0), SplitPenalty(0),
     78         LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0),
     79         FakeRParens(0), LastInChainOfCalls(false) {
     80   }
     81 
     82   bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
     83 
     84   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
     85     return is(K1) || is(K2);
     86   }
     87 
     88   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
     89     return is(K1) || is(K2) || is(K3);
     90   }
     91 
     92   bool isOneOf(
     93       tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
     94       tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
     95       tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
     96       tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
     97       tok::TokenKind K10 = tok::NUM_TOKENS,
     98       tok::TokenKind K11 = tok::NUM_TOKENS,
     99       tok::TokenKind K12 = tok::NUM_TOKENS) const {
    100     return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
    101            is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
    102   }
    103 
    104   bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
    105 
    106   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
    107     return FormatTok.Tok.isObjCAtKeyword(Kind);
    108   }
    109 
    110   FormatToken FormatTok;
    111 
    112   TokenType Type;
    113 
    114   unsigned SpacesRequiredBefore;
    115   bool CanBreakBefore;
    116   bool MustBreakBefore;
    117 
    118   bool ClosesTemplateDeclaration;
    119 
    120   AnnotatedToken *MatchingParen;
    121 
    122   /// \brief Number of parameters, if this is "(", "[" or "<".
    123   ///
    124   /// This is initialized to 1 as we don't need to distinguish functions with
    125   /// 0 parameters from functions with 1 parameter. Thus, we can simply count
    126   /// the number of commas.
    127   unsigned ParameterCount;
    128 
    129   /// \brief The total length of the line up to and including this token.
    130   unsigned TotalLength;
    131 
    132   // FIXME: Come up with a 'cleaner' concept.
    133   /// \brief The binding strength of a token. This is a combined value of
    134   /// operator precedence, parenthesis nesting, etc.
    135   unsigned BindingStrength;
    136 
    137   /// \brief Penalty for inserting a line break before this token.
    138   unsigned SplitPenalty;
    139 
    140   /// \brief If this is the first ObjC selector name in an ObjC method
    141   /// definition or call, this contains the length of the longest name.
    142   unsigned LongestObjCSelectorName;
    143 
    144   std::vector<AnnotatedToken> Children;
    145   AnnotatedToken *Parent;
    146 
    147   /// \brief Insert this many fake ( before this token for correct indentation.
    148   unsigned FakeLParens;
    149   /// \brief Insert this many fake ) after this token for correct indentation.
    150   unsigned FakeRParens;
    151 
    152   /// \brief Is this the last "." or "->" in a builder-type call?
    153   bool LastInChainOfCalls;
    154 
    155   const AnnotatedToken *getPreviousNoneComment() const {
    156     AnnotatedToken *Tok = Parent;
    157     while (Tok != NULL && Tok->is(tok::comment))
    158       Tok = Tok->Parent;
    159     return Tok;
    160   }
    161 };
    162 
    163 class AnnotatedLine {
    164 public:
    165   AnnotatedLine(const UnwrappedLine &Line)
    166       : First(Line.Tokens.front()), Level(Line.Level),
    167         InPPDirective(Line.InPPDirective),
    168         MustBeDeclaration(Line.MustBeDeclaration),
    169         MightBeFunctionDecl(false) {
    170     assert(!Line.Tokens.empty());
    171     AnnotatedToken *Current = &First;
    172     for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
    173                                                 E = Line.Tokens.end();
    174          I != E; ++I) {
    175       Current->Children.push_back(AnnotatedToken(*I));
    176       Current->Children[0].Parent = Current;
    177       Current = &Current->Children[0];
    178     }
    179     Last = Current;
    180   }
    181   AnnotatedLine(const AnnotatedLine &Other)
    182       : First(Other.First), Type(Other.Type), Level(Other.Level),
    183         InPPDirective(Other.InPPDirective),
    184         MustBeDeclaration(Other.MustBeDeclaration),
    185         MightBeFunctionDecl(Other.MightBeFunctionDecl) {
    186     Last = &First;
    187     while (!Last->Children.empty()) {
    188       Last->Children[0].Parent = Last;
    189       Last = &Last->Children[0];
    190     }
    191   }
    192 
    193   AnnotatedToken First;
    194   AnnotatedToken *Last;
    195 
    196   LineType Type;
    197   unsigned Level;
    198   bool InPPDirective;
    199   bool MustBeDeclaration;
    200   bool MightBeFunctionDecl;
    201 };
    202 
    203 inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
    204   return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
    205 }
    206 
    207 /// \brief Determines extra information about the tokens comprising an
    208 /// \c UnwrappedLine.
    209 class TokenAnnotator {
    210 public:
    211   TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
    212                  IdentifierInfo &Ident_in)
    213       : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
    214   }
    215 
    216   void annotate(AnnotatedLine &Line);
    217   void calculateFormattingInformation(AnnotatedLine &Line);
    218 
    219 private:
    220   /// \brief Calculate the penalty for splitting before \c Tok.
    221   unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
    222 
    223   bool spaceRequiredBetween(const AnnotatedLine &Line,
    224                             const AnnotatedToken &Left,
    225                             const AnnotatedToken &Right);
    226 
    227   bool spaceRequiredBefore(const AnnotatedLine &Line,
    228                            const AnnotatedToken &Tok);
    229 
    230   bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
    231 
    232   const FormatStyle &Style;
    233   SourceManager &SourceMgr;
    234   Lexer &Lex;
    235 
    236   // Contextual keywords:
    237   IdentifierInfo &Ident_in;
    238 };
    239 
    240 } // end namespace format
    241 } // end namespace clang
    242 
    243 #endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
    244