1 # Copyright 2015 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import collections 6 import itertools 7 import token 8 import tokenize 9 10 11 def _Pairwise(iterable): 12 """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ...""" 13 a, b = itertools.tee(iterable) 14 a = itertools.chain((None,), a) 15 return itertools.izip(a, b) 16 17 18 class OffsetToken(object): 19 """A Python token with a relative position. 20 21 A token is represented by a type defined in Python's token module, a string 22 representing the content, and an offset. Using relative positions makes it 23 easy to insert and remove tokens. 24 """ 25 26 def __init__(self, token_type, string, offset): 27 self._type = token_type 28 self._string = string 29 self._offset = offset 30 31 @property 32 def type(self): 33 return self._type 34 35 @property 36 def type_name(self): 37 return token.tok_name[self._type] 38 39 @property 40 def string(self): 41 return self._string 42 43 @string.setter 44 def string(self, value): 45 self._string = value 46 47 @property 48 def offset(self): 49 return self._offset 50 51 def __str__(self): 52 return str((self.type_name, self.string, self.offset)) 53 54 55 def Tokenize(f): 56 """Read tokens from a file-like object. 57 58 Args: 59 f: Any object that has a readline method. 60 61 Returns: 62 A collections.deque containing OffsetTokens. Deques are cheaper and easier 63 to manipulate sequentially than lists. 64 """ 65 f.seek(0) 66 tokenize_tokens = tokenize.generate_tokens(f.readline) 67 68 offset_tokens = collections.deque() 69 for prev_token, next_token in _Pairwise(tokenize_tokens): 70 token_type, string, (srow, scol), _, _ = next_token 71 if not prev_token: 72 offset_tokens.append(OffsetToken(token_type, string, (0, 0))) 73 else: 74 erow, ecol = prev_token[3] 75 if erow == srow: 76 offset_tokens.append(OffsetToken(token_type, string, (0, scol - ecol))) 77 else: 78 offset_tokens.append(OffsetToken( 79 token_type, string, (srow - erow, scol))) 80 81 return offset_tokens 82 83 84 def Untokenize(offset_tokens): 85 """Return the string representation of an iterable of OffsetTokens.""" 86 # Make a copy. Don't modify the original. 87 offset_tokens = collections.deque(offset_tokens) 88 89 # Strip leading NL tokens. 90 while offset_tokens[0].type == tokenize.NL: 91 offset_tokens.popleft() 92 93 # Strip leading vertical whitespace. 94 first_token = offset_tokens.popleft() 95 # Take care not to modify the existing token. Create a new one in its place. 96 first_token = OffsetToken(first_token.type, first_token.string, 97 (0, first_token.offset[1])) 98 offset_tokens.appendleft(first_token) 99 100 # Convert OffsetTokens to tokenize tokens. 101 tokenize_tokens = [] 102 row = 1 103 col = 0 104 for t in offset_tokens: 105 offset_row, offset_col = t.offset 106 if offset_row == 0: 107 col += offset_col 108 else: 109 row += offset_row 110 col = offset_col 111 tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None)) 112 113 # tokenize can't handle whitespace before line continuations. 114 # So add a space. 115 return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n') 116