Home | History | Annotate | Download | only in refactor
      1 # Copyright 2015 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import collections
      6 import itertools
      7 import token
      8 import tokenize
      9 
     10 
     11 def _Pairwise(iterable):
     12   """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ..."""
     13   a, b = itertools.tee(iterable)
     14   a = itertools.chain((None,), a)
     15   return itertools.izip(a, b)
     16 
     17 
     18 class OffsetToken(object):
     19   """A Python token with a relative position.
     20 
     21   A token is represented by a type defined in Python's token module, a string
     22   representing the content, and an offset. Using relative positions makes it
     23   easy to insert and remove tokens.
     24   """
     25 
     26   def __init__(self, token_type, string, offset):
     27     self._type = token_type
     28     self._string = string
     29     self._offset = offset
     30 
     31   @property
     32   def type(self):
     33     return self._type
     34 
     35   @property
     36   def type_name(self):
     37     return token.tok_name[self._type]
     38 
     39   @property
     40   def string(self):
     41     return self._string
     42 
     43   @string.setter
     44   def string(self, value):
     45     self._string = value
     46 
     47   @property
     48   def offset(self):
     49     return self._offset
     50 
     51   def __str__(self):
     52     return str((self.type_name, self.string, self.offset))
     53 
     54 
     55 def Tokenize(f):
     56   """Read tokens from a file-like object.
     57 
     58   Args:
     59     f: Any object that has a readline method.
     60 
     61   Returns:
     62     A collections.deque containing OffsetTokens. Deques are cheaper and easier
     63     to manipulate sequentially than lists.
     64   """
     65   f.seek(0)
     66   tokenize_tokens = tokenize.generate_tokens(f.readline)
     67 
     68   offset_tokens = collections.deque()
     69   for prev_token, next_token in _Pairwise(tokenize_tokens):
     70     token_type, string, (srow, scol), _, _ = next_token
     71     if not prev_token:
     72       offset_tokens.append(OffsetToken(token_type, string, (0, 0)))
     73     else:
     74       erow, ecol = prev_token[3]
     75       if erow == srow:
     76         offset_tokens.append(OffsetToken(token_type, string, (0, scol - ecol)))
     77       else:
     78         offset_tokens.append(OffsetToken(
     79             token_type, string, (srow - erow, scol)))
     80 
     81   return offset_tokens
     82 
     83 
     84 def Untokenize(offset_tokens):
     85   """Return the string representation of an iterable of OffsetTokens."""
     86   # Make a copy. Don't modify the original.
     87   offset_tokens = collections.deque(offset_tokens)
     88 
     89   # Strip leading NL tokens.
     90   while offset_tokens[0].type == tokenize.NL:
     91     offset_tokens.popleft()
     92 
     93   # Strip leading vertical whitespace.
     94   first_token = offset_tokens.popleft()
     95   # Take care not to modify the existing token. Create a new one in its place.
     96   first_token = OffsetToken(first_token.type, first_token.string,
     97                             (0, first_token.offset[1]))
     98   offset_tokens.appendleft(first_token)
     99 
    100   # Convert OffsetTokens to tokenize tokens.
    101   tokenize_tokens = []
    102   row = 1
    103   col = 0
    104   for t in offset_tokens:
    105     offset_row, offset_col = t.offset
    106     if offset_row == 0:
    107       col += offset_col
    108     else:
    109       row += offset_row
    110       col = offset_col
    111     tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None))
    112 
    113   # tokenize can't handle whitespace before line continuations.
    114   # So add a space.
    115   return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n')
    116