Home | History | Annotate | Download | only in yapflib
      1 # Copyright 2015 Google Inc. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 """Pytree nodes with extra formatting information.
     15 
     16 This is a thin wrapper around a pytree.Leaf node.
     17 """
     18 
     19 import keyword
     20 import re
     21 
     22 from lib2to3.pgen2 import token
     23 
     24 from yapf.yapflib import py3compat
     25 from yapf.yapflib import pytree_utils
     26 from yapf.yapflib import style
     27 
     28 CONTINUATION = token.N_TOKENS
     29 
     30 
     31 class Subtype(object):
     32   """Subtype information about tokens.
     33 
     34   Gleaned from parsing the code. Helps determine the best formatting.
     35   """
     36   NONE = 0
     37   UNARY_OPERATOR = 1
     38   BINARY_OPERATOR = 2
     39   SUBSCRIPT_COLON = 3
     40   SUBSCRIPT_BRACKET = 4
     41   DEFAULT_OR_NAMED_ASSIGN = 5
     42   DEFAULT_OR_NAMED_ASSIGN_ARG_LIST = 6
     43   VARARGS_LIST = 7
     44   VARARGS_STAR = 8
     45   KWARGS_STAR_STAR = 9
     46   ASSIGN_OPERATOR = 10
     47   DICTIONARY_KEY = 11
     48   DICTIONARY_KEY_PART = 12
     49   DICTIONARY_VALUE = 13
     50   DICT_SET_GENERATOR = 14
     51   COMP_EXPR = 21
     52   COMP_FOR = 15
     53   COMP_IF = 16
     54   FUNC_DEF = 17
     55   DECORATOR = 18
     56   TYPED_NAME = 19
     57   TYPED_NAME_ARG_LIST = 20
     58 
     59 
     60 def _TabbedContinuationAlignPadding(spaces, align_style, tab_width,
     61                                     continuation_indent_width):
     62   """Build padding string for continuation alignment in tabbed indentation.
     63 
     64   Arguments:
     65     spaces: (int) The number of spaces to place before the token for alignment.
     66     align_style: (str) The alignment style for continuation lines.
     67     tab_width: (int) Number of columns of each tab character.
     68     continuation_indent_width: (int) Indent columns for line continuations.
     69 
     70   Returns:
     71     A padding string for alignment with style specified by align_style option.
     72   """
     73   if align_style == 'FIXED':
     74     if spaces > 0:
     75       return '\t' * int(continuation_indent_width / tab_width)
     76     return ''
     77   elif align_style == 'VALIGN-RIGHT':
     78     return '\t' * int((spaces + tab_width - 1) / tab_width)
     79   return ' ' * spaces
     80 
     81 
     82 class FormatToken(object):
     83   """A wrapper around pytree Leaf nodes.
     84 
     85   This represents the token plus additional information useful for reformatting
     86   the code.
     87 
     88   Attributes:
     89     next_token: The token in the unwrapped line after this token or None if this
     90       is the last token in the unwrapped line.
     91     previous_token: The token in the unwrapped line before this token or None if
     92       this is the first token in the unwrapped line.
     93     matching_bracket: If a bracket token ('[', '{', or '(') the matching
     94       bracket.
     95     container_opening: If the object is in a container, this points to its
     96       opening bracket.
     97     container_elements: If this is the start of a container, a list of the
     98       elements in the container.
     99     whitespace_prefix: The prefix for the whitespace.
    100     spaces_required_before: The number of spaces required before a token. This
    101       is a lower-bound for the formatter and not a hard requirement. For
    102       instance, a comment may have n required spaces before it. But the
    103       formatter won't place n spaces before all comments. Only those that are
    104       moved to the end of a line of code. The formatter may use different
    105       spacing when appropriate.
    106     can_break_before: True if we're allowed to break before this token.
    107     must_break_before: True if we're required to break before this token.
    108     total_length: The total length of the unwrapped line up to and including
    109       whitespace and this token. However, this doesn't include the initial
    110       indentation amount.
    111     split_penalty: The penalty for splitting the line before this token.
    112   """
    113 
    114   def __init__(self, node):
    115     """Constructor.
    116 
    117     Arguments:
    118       node: (pytree.Leaf) The node that's being wrapped.
    119     """
    120     self.node = node
    121     self.next_token = None
    122     self.previous_token = None
    123     self.matching_bracket = None
    124     self.container_opening = None
    125     self.container_elements = []
    126     self.whitespace_prefix = ''
    127     self.can_break_before = False
    128     self.must_break_before = False
    129     self.total_length = 0  # TODO(morbo): Think up a better name.
    130     self.split_penalty = 0
    131 
    132     if self.is_comment:
    133       self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT')
    134     else:
    135       self.spaces_required_before = 0
    136 
    137     if self.is_continuation:
    138       self.value = self.node.value.rstrip()
    139     else:
    140       self.value = self.node.value
    141 
    142   def AddWhitespacePrefix(self, newlines_before, spaces=0, indent_level=0):
    143     """Register a token's whitespace prefix.
    144 
    145     This is the whitespace that will be output before a token's string.
    146 
    147     Arguments:
    148       newlines_before: (int) The number of newlines to place before the token.
    149       spaces: (int) The number of spaces to place before the token.
    150       indent_level: (int) The indentation level.
    151     """
    152     if style.Get('USE_TABS'):
    153       if newlines_before > 0:
    154         indent_before = '\t' * indent_level + _TabbedContinuationAlignPadding(
    155             spaces, style.Get('CONTINUATION_ALIGN_STYLE'),
    156             style.Get('INDENT_WIDTH'), style.Get('CONTINUATION_INDENT_WIDTH'))
    157       else:
    158         indent_before = '\t' * indent_level + ' ' * spaces
    159     else:
    160       indent_before = (
    161           ' ' * indent_level * style.Get('INDENT_WIDTH') + ' ' * spaces)
    162 
    163     if self.is_comment:
    164       comment_lines = [s.lstrip() for s in self.value.splitlines()]
    165       self.node.value = ('\n' + indent_before).join(comment_lines)
    166 
    167       # Update our own value since we are changing node value
    168       self.value = self.node.value
    169 
    170     if not self.whitespace_prefix:
    171       self.whitespace_prefix = (
    172           '\n' * (self.newlines or newlines_before) + indent_before)
    173     else:
    174       self.whitespace_prefix += indent_before
    175 
    176   def AdjustNewlinesBefore(self, newlines_before):
    177     """Change the number of newlines before this token."""
    178     self.whitespace_prefix = (
    179         '\n' * newlines_before + self.whitespace_prefix.lstrip('\n'))
    180 
    181   def RetainHorizontalSpacing(self, first_column, depth):
    182     """Retains a token's horizontal spacing."""
    183     previous = self.previous_token
    184     if not previous:
    185       return
    186 
    187     if previous.is_pseudo_paren:
    188       previous = previous.previous_token
    189       if not previous:
    190         return
    191 
    192     cur_lineno = self.lineno
    193     prev_lineno = previous.lineno
    194     if previous.is_multiline_string:
    195       prev_lineno += previous.value.count('\n')
    196 
    197     if (cur_lineno != prev_lineno or
    198         (previous.is_pseudo_paren and previous.value != ')' and
    199          cur_lineno != previous.previous_token.lineno)):
    200       self.spaces_required_before = (
    201           self.column - first_column + depth * style.Get('INDENT_WIDTH'))
    202       return
    203 
    204     cur_column = self.node.column
    205     prev_column = previous.node.column
    206     prev_len = len(previous.value)
    207 
    208     if previous.is_pseudo_paren and previous.value == ')':
    209       prev_column -= 1
    210       prev_len = 0
    211 
    212     if previous.is_multiline_string:
    213       prev_len = len(previous.value.split('\n')[-1])
    214       if '\n' in previous.value:
    215         prev_column = 0  # Last line starts in column 0.
    216 
    217     self.spaces_required_before = cur_column - (prev_column + prev_len)
    218 
    219   def OpensScope(self):
    220     return self.value in pytree_utils.OPENING_BRACKETS
    221 
    222   def ClosesScope(self):
    223     return self.value in pytree_utils.CLOSING_BRACKETS
    224 
    225   def __repr__(self):
    226     msg = 'FormatToken(name={0}, value={1}'.format(self.name, self.value)
    227     msg += ', pseudo)' if self.is_pseudo_paren else ')'
    228     return msg
    229 
    230   @property
    231   @py3compat.lru_cache()
    232   def node_split_penalty(self):
    233     """Split penalty attached to the pytree node of this token."""
    234     return pytree_utils.GetNodeAnnotation(
    235         self.node, pytree_utils.Annotation.SPLIT_PENALTY, default=0)
    236 
    237   @property
    238   def newlines(self):
    239     """The number of newlines needed before this token."""
    240     return pytree_utils.GetNodeAnnotation(self.node,
    241                                           pytree_utils.Annotation.NEWLINES)
    242 
    243   @property
    244   def must_split(self):
    245     """Return true if the token requires a split before it."""
    246     return pytree_utils.GetNodeAnnotation(self.node,
    247                                           pytree_utils.Annotation.MUST_SPLIT)
    248 
    249   @property
    250   def column(self):
    251     """The original column number of the node in the source."""
    252     return self.node.column
    253 
    254   @property
    255   def lineno(self):
    256     """The original line number of the node in the source."""
    257     return self.node.lineno
    258 
    259   @property
    260   @py3compat.lru_cache()
    261   def subtypes(self):
    262     """Extra type information for directing formatting."""
    263     value = pytree_utils.GetNodeAnnotation(self.node,
    264                                            pytree_utils.Annotation.SUBTYPE)
    265     return [Subtype.NONE] if value is None else value
    266 
    267   @property
    268   @py3compat.lru_cache()
    269   def is_binary_op(self):
    270     """Token is a binary operator."""
    271     return Subtype.BINARY_OPERATOR in self.subtypes
    272 
    273   @property
    274   @py3compat.lru_cache()
    275   def name(self):
    276     """A string representation of the node's name."""
    277     return pytree_utils.NodeName(self.node)
    278 
    279   @property
    280   def is_comment(self):
    281     return self.node.type == token.COMMENT
    282 
    283   @property
    284   def is_continuation(self):
    285     return self.node.type == CONTINUATION
    286 
    287   @property
    288   @py3compat.lru_cache()
    289   def is_keyword(self):
    290     return keyword.iskeyword(self.value)
    291 
    292   @property
    293   @py3compat.lru_cache()
    294   def is_name(self):
    295     return self.node.type == token.NAME and not self.is_keyword
    296 
    297   @property
    298   def is_number(self):
    299     return self.node.type == token.NUMBER
    300 
    301   @property
    302   def is_string(self):
    303     return self.node.type == token.STRING
    304 
    305   @property
    306   @py3compat.lru_cache()
    307   def is_multiline_string(self):
    308     """A multiline string."""
    309     if py3compat.PY3:
    310       prefix = '('
    311       prefix += 'r|u|R|U|f|F|fr|Fr|fR|FR|rf|rF|Rf|RF'  # strings
    312       prefix += '|b|B|br|Br|bR|BR|rb|rB|Rb|RB'  # bytes
    313       prefix += ')?'
    314     else:
    315       prefix = '[uUbB]?[rR]?'
    316 
    317     regex = r'^{prefix}(?P<delim>"""|\'\'\').*(?P=delim)$'.format(prefix=prefix)
    318     return (self.is_string and
    319             re.match(regex, self.value, re.DOTALL) is not None)
    320 
    321   @property
    322   @py3compat.lru_cache()
    323   def is_docstring(self):
    324     return self.is_multiline_string and not self.node.prev_sibling
    325 
    326   @property
    327   @py3compat.lru_cache()
    328   def is_pseudo_paren(self):
    329     return hasattr(self.node, 'is_pseudo') and self.node.is_pseudo
    330 
    331   @property
    332   def is_pylint_comment(self):
    333     return self.is_comment and re.match(r'#.*\bpylint:\s*(disable|enable)=',
    334                                         self.value)
    335 
    336   @property
    337   def is_pytype_comment(self):
    338     return self.is_comment and re.match(r'#.*\bpytype:\s*(disable|enable)=',
    339                                         self.value)
    340