Home | History | Annotate | Download | only in thirdparty
      1 # Copyright (C) 2010-2011 Hideo Hattori
      2 # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
      3 # Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling
      4 #
      5 # Permission is hereby granted, free of charge, to any person obtaining
      6 # a copy of this software and associated documentation files (the
      7 # "Software"), to deal in the Software without restriction, including
      8 # without limitation the rights to use, copy, modify, merge, publish,
      9 # distribute, sublicense, and/or sell copies of the Software, and to
     10 # permit persons to whom the Software is furnished to do so, subject to
     11 # the following conditions:
     12 #
     13 # The above copyright notice and this permission notice shall be
     14 # included in all copies or substantial portions of the Software.
     15 #
     16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23 # SOFTWARE.
     24 
     25 """Automatically formats Python code to conform to the PEP 8 style guide.
     26 
     27 Fixes that only need be done once can be added by adding a function of the form
     28 "fix_<code>(source)" to this module. They should return the fixed source code.
     29 These fixes are picked up by apply_global_fixes().
     30 
     31 Fixes that depend on pep8 should be added as methods to FixPEP8. See the class
     32 documentation for more information.
     33 
     34 """
     35 
     36 from __future__ import absolute_import
     37 from __future__ import division
     38 from __future__ import print_function
     39 from __future__ import unicode_literals
     40 
     41 import bisect
     42 import codecs
     43 import collections
     44 import copy
     45 import difflib
     46 import fnmatch
     47 import inspect
     48 import io
     49 import itertools
     50 import keyword
     51 import locale
     52 import os
     53 import re
     54 import signal
     55 import sys
     56 import token
     57 import tokenize
     58 
     59 import pep8
     60 
     61 
     62 try:
     63     unicode
     64 except NameError:
     65     unicode = str
     66 
     67 
     68 __version__ = '1.0.3'
     69 
     70 
     71 CR = '\r'
     72 LF = '\n'
     73 CRLF = '\r\n'
     74 
     75 
     76 PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
     77 
     78 
     79 # For generating line shortening candidates.
     80 SHORTEN_OPERATOR_GROUPS = frozenset([
     81     frozenset([',']),
     82     frozenset(['%']),
     83     frozenset([',', '(', '[', '{']),
     84     frozenset(['%', '(', '[', '{']),
     85     frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
     86     frozenset(['%', '+', '-', '*', '/', '//']),
     87 ])
     88 
     89 
     90 DEFAULT_IGNORE = 'E24'
     91 DEFAULT_INDENT_SIZE = 4
     92 
     93 
     94 # W602 is handled separately due to the need to avoid "with_traceback".
     95 CODE_TO_2TO3 = {
     96     'E721': ['idioms'],
     97     'W601': ['has_key'],
     98     'W603': ['ne'],
     99     'W604': ['repr'],
    100     'W690': ['apply',
    101              'except',
    102              'exitfunc',
    103              'import',
    104              'numliterals',
    105              'operator',
    106              'paren',
    107              'reduce',
    108              'renames',
    109              'standarderror',
    110              'sys_exc',
    111              'throw',
    112              'tuple_params',
    113              'xreadlines']}
    114 
    115 
    116 def open_with_encoding(filename, encoding=None, mode='r'):
    117     """Return opened file with a specific encoding."""
    118     if not encoding:
    119         encoding = detect_encoding(filename)
    120 
    121     return io.open(filename, mode=mode, encoding=encoding,
    122                    newline='')  # Preserve line endings
    123 
    124 
    125 def detect_encoding(filename):
    126     """Return file encoding."""
    127     try:
    128         with open(filename, 'rb') as input_file:
    129             from lib2to3.pgen2 import tokenize as lib2to3_tokenize
    130             encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
    131 
    132         # Check for correctness of encoding
    133         with open_with_encoding(filename, encoding) as test_file:
    134             test_file.read()
    135 
    136         return encoding
    137     except (LookupError, SyntaxError, UnicodeDecodeError):
    138         return 'latin-1'
    139 
    140 
    141 def readlines_from_file(filename):
    142     """Return contents of file."""
    143     with open_with_encoding(filename) as input_file:
    144         return input_file.readlines()
    145 
    146 
    147 def extended_blank_lines(logical_line,
    148                          blank_lines,
    149                          indent_level,
    150                          previous_logical):
    151     """Check for missing blank lines after class declaration."""
    152     if previous_logical.startswith('class '):
    153         if (
    154             logical_line.startswith(('def ', 'class ', '@')) or
    155             pep8.DOCSTRING_REGEX.match(logical_line)
    156         ):
    157             if indent_level and not blank_lines:
    158                 yield (0, 'E309 expected 1 blank line after class declaration')
    159     elif previous_logical.startswith('def '):
    160         if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line):
    161             yield (0, 'E303 too many blank lines ({0})'.format(blank_lines))
    162     elif pep8.DOCSTRING_REGEX.match(previous_logical):
    163         # Missing blank line between class docstring and method declaration.
    164         if (
    165             indent_level and
    166             not blank_lines and
    167             logical_line.startswith(('def ')) and
    168             '(self' in logical_line
    169         ):
    170             yield (0, 'E301 expected 1 blank line, found 0')
    171 pep8.register_check(extended_blank_lines)
    172 
    173 
    174 def continued_indentation(logical_line, tokens, indent_level, indent_char,
    175                           noqa):
    176     """Override pep8's function to provide indentation information."""
    177     first_row = tokens[0][2][0]
    178     nrows = 1 + tokens[-1][2][0] - first_row
    179     if noqa or nrows == 1:
    180         return
    181 
    182     # indent_next tells us whether the next block is indented. Assuming
    183     # that it is indented by 4 spaces, then we should not allow 4-space
    184     # indents on the final continuation line. In turn, some other
    185     # indents are allowed to have an extra 4 spaces.
    186     indent_next = logical_line.endswith(':')
    187 
    188     row = depth = 0
    189     valid_hangs = (
    190         (DEFAULT_INDENT_SIZE,)
    191         if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
    192                                      2 * DEFAULT_INDENT_SIZE)
    193     )
    194 
    195     # Remember how many brackets were opened on each line.
    196     parens = [0] * nrows
    197 
    198     # Relative indents of physical lines.
    199     rel_indent = [0] * nrows
    200 
    201     # For each depth, collect a list of opening rows.
    202     open_rows = [[0]]
    203     # For each depth, memorize the hanging indentation.
    204     hangs = [None]
    205 
    206     # Visual indents.
    207     indent_chances = {}
    208     last_indent = tokens[0][2]
    209     indent = [last_indent[1]]
    210 
    211     last_token_multiline = None
    212     line = None
    213     last_line = ''
    214     last_line_begins_with_multiline = False
    215     for token_type, text, start, end, line in tokens:
    216 
    217         newline = row < start[0] - first_row
    218         if newline:
    219             row = start[0] - first_row
    220             newline = (not last_token_multiline and
    221                        token_type not in (tokenize.NL, tokenize.NEWLINE))
    222             last_line_begins_with_multiline = last_token_multiline
    223 
    224         if newline:
    225             # This is the beginning of a continuation line.
    226             last_indent = start
    227 
    228             # Record the initial indent.
    229             rel_indent[row] = pep8.expand_indent(line) - indent_level
    230 
    231             # Identify closing bracket.
    232             close_bracket = (token_type == tokenize.OP and text in ']})')
    233 
    234             # Is the indent relative to an opening bracket line?
    235             for open_row in reversed(open_rows[depth]):
    236                 hang = rel_indent[row] - rel_indent[open_row]
    237                 hanging_indent = hang in valid_hangs
    238                 if hanging_indent:
    239                     break
    240             if hangs[depth]:
    241                 hanging_indent = (hang == hangs[depth])
    242 
    243             visual_indent = (not close_bracket and hang > 0 and
    244                              indent_chances.get(start[1]))
    245 
    246             if close_bracket and indent[depth]:
    247                 # Closing bracket for visual indent.
    248                 if start[1] != indent[depth]:
    249                     yield (start, 'E124 {0}'.format(indent[depth]))
    250             elif close_bracket and not hang:
    251                 pass
    252             elif indent[depth] and start[1] < indent[depth]:
    253                 # Visual indent is broken.
    254                 yield (start, 'E128 {0}'.format(indent[depth]))
    255             elif (hanging_indent or
    256                   (indent_next and
    257                    rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
    258                 # Hanging indent is verified.
    259                 if close_bracket:
    260                     yield (start, 'E123 {0}'.format(indent_level +
    261                                                     rel_indent[open_row]))
    262                 hangs[depth] = hang
    263             elif visual_indent is True:
    264                 # Visual indent is verified.
    265                 indent[depth] = start[1]
    266             elif visual_indent in (text, unicode):
    267                 # Ignore token lined up with matching one from a previous line.
    268                 pass
    269             else:
    270                 one_indented = (indent_level + rel_indent[open_row] +
    271                                 DEFAULT_INDENT_SIZE)
    272                 # Indent is broken.
    273                 if hang <= 0:
    274                     error = ('E122', one_indented)
    275                 elif indent[depth]:
    276                     error = ('E127', indent[depth])
    277                 elif hang > DEFAULT_INDENT_SIZE:
    278                     error = ('E126', one_indented)
    279                 else:
    280                     hangs[depth] = hang
    281                     error = ('E121', one_indented)
    282 
    283                 yield (start, '{0} {1}'.format(*error))
    284 
    285         # Look for visual indenting.
    286         if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
    287                 and not indent[depth]):
    288             indent[depth] = start[1]
    289             indent_chances[start[1]] = True
    290         # Deal with implicit string concatenation.
    291         elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
    292               text in ('u', 'ur', 'b', 'br')):
    293             indent_chances[start[1]] = unicode
    294         # Special case for the "if" statement because len("if (") is equal to
    295         # 4.
    296         elif not indent_chances and not row and not depth and text == 'if':
    297             indent_chances[end[1] + 1] = True
    298         elif text == ':' and line[end[1]:].isspace():
    299             open_rows[depth].append(row)
    300 
    301         # Keep track of bracket depth.
    302         if token_type == tokenize.OP:
    303             if text in '([{':
    304                 depth += 1
    305                 indent.append(0)
    306                 hangs.append(None)
    307                 if len(open_rows) == depth:
    308                     open_rows.append([])
    309                 open_rows[depth].append(row)
    310                 parens[row] += 1
    311             elif text in ')]}' and depth > 0:
    312                 # Parent indents should not be more than this one.
    313                 prev_indent = indent.pop() or last_indent[1]
    314                 hangs.pop()
    315                 for d in range(depth):
    316                     if indent[d] > prev_indent:
    317                         indent[d] = 0
    318                 for ind in list(indent_chances):
    319                     if ind >= prev_indent:
    320                         del indent_chances[ind]
    321                 del open_rows[depth + 1:]
    322                 depth -= 1
    323                 if depth:
    324                     indent_chances[indent[depth]] = True
    325                 for idx in range(row, -1, -1):
    326                     if parens[idx]:
    327                         parens[idx] -= 1
    328                         break
    329             assert len(indent) == depth + 1
    330             if (
    331                 start[1] not in indent_chances and
    332                 # This is for purposes of speeding up E121 (GitHub #90).
    333                 not last_line.rstrip().endswith(',')
    334             ):
    335                 # Allow to line up tokens.
    336                 indent_chances[start[1]] = text
    337 
    338         last_token_multiline = (start[0] != end[0])
    339         if last_token_multiline:
    340             rel_indent[end[0] - first_row] = rel_indent[row]
    341 
    342         last_line = line
    343 
    344     if (
    345         indent_next and
    346         not last_line_begins_with_multiline and
    347         pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
    348     ):
    349         pos = (start[0], indent[0] + 4)
    350         yield (pos, 'E125 {0}'.format(indent_level +
    351                                       2 * DEFAULT_INDENT_SIZE))
    352 del pep8._checks['logical_line'][pep8.continued_indentation]
    353 pep8.register_check(continued_indentation)
    354 
    355 
    356 class FixPEP8(object):
    357 
    358     """Fix invalid code.
    359 
    360     Fixer methods are prefixed "fix_". The _fix_source() method looks for these
    361     automatically.
    362 
    363     The fixer method can take either one or two arguments (in addition to
    364     self). The first argument is "result", which is the error information from
    365     pep8. The second argument, "logical", is required only for logical-line
    366     fixes.
    367 
    368     The fixer method can return the list of modified lines or None. An empty
    369     list would mean that no changes were made. None would mean that only the
    370     line reported in the pep8 error was modified. Note that the modified line
    371     numbers that are returned are indexed at 1. This typically would correspond
    372     with the line number reported in the pep8 error information.
    373 
    374     [fixed method list]
    375         - e121,e122,e123,e124,e125,e126,e127,e128,e129
    376         - e201,e202,e203
    377         - e211
    378         - e221,e222,e223,e224,e225
    379         - e231
    380         - e251
    381         - e261,e262
    382         - e271,e272,e273,e274
    383         - e301,e302,e303
    384         - e401
    385         - e502
    386         - e701,e702
    387         - e711
    388         - w291
    389 
    390     """
    391 
    392     def __init__(self, filename,
    393                  options,
    394                  contents=None,
    395                  long_line_ignore_cache=None):
    396         self.filename = filename
    397         if contents is None:
    398             self.source = readlines_from_file(filename)
    399         else:
    400             sio = io.StringIO(contents)
    401             self.source = sio.readlines()
    402         self.options = options
    403         self.indent_word = _get_indentword(''.join(self.source))
    404 
    405         self.long_line_ignore_cache = (
    406             set() if long_line_ignore_cache is None
    407             else long_line_ignore_cache)
    408 
    409         # Many fixers are the same even though pep8 categorizes them
    410         # differently.
    411         self.fix_e115 = self.fix_e112
    412         self.fix_e116 = self.fix_e113
    413         self.fix_e121 = self._fix_reindent
    414         self.fix_e122 = self._fix_reindent
    415         self.fix_e123 = self._fix_reindent
    416         self.fix_e124 = self._fix_reindent
    417         self.fix_e126 = self._fix_reindent
    418         self.fix_e127 = self._fix_reindent
    419         self.fix_e128 = self._fix_reindent
    420         self.fix_e129 = self._fix_reindent
    421         self.fix_e202 = self.fix_e201
    422         self.fix_e203 = self.fix_e201
    423         self.fix_e211 = self.fix_e201
    424         self.fix_e221 = self.fix_e271
    425         self.fix_e222 = self.fix_e271
    426         self.fix_e223 = self.fix_e271
    427         self.fix_e226 = self.fix_e225
    428         self.fix_e227 = self.fix_e225
    429         self.fix_e228 = self.fix_e225
    430         self.fix_e241 = self.fix_e271
    431         self.fix_e242 = self.fix_e224
    432         self.fix_e261 = self.fix_e262
    433         self.fix_e272 = self.fix_e271
    434         self.fix_e273 = self.fix_e271
    435         self.fix_e274 = self.fix_e271
    436         self.fix_e309 = self.fix_e301
    437         self.fix_e501 = (
    438             self.fix_long_line_logically if
    439             options and (options.aggressive >= 2 or options.experimental) else
    440             self.fix_long_line_physically)
    441         self.fix_e703 = self.fix_e702
    442 
    443         self._ws_comma_done = False
    444 
    445     def _fix_source(self, results):
    446         try:
    447             (logical_start, logical_end) = _find_logical(self.source)
    448             logical_support = True
    449         except (SyntaxError, tokenize.TokenError):  # pragma: no cover
    450             logical_support = False
    451 
    452         completed_lines = set()
    453         for result in sorted(results, key=_priority_key):
    454             if result['line'] in completed_lines:
    455                 continue
    456 
    457             fixed_methodname = 'fix_' + result['id'].lower()
    458             if hasattr(self, fixed_methodname):
    459                 fix = getattr(self, fixed_methodname)
    460 
    461                 line_index = result['line'] - 1
    462                 original_line = self.source[line_index]
    463 
    464                 is_logical_fix = len(inspect.getargspec(fix).args) > 2
    465                 if is_logical_fix:
    466                     logical = None
    467                     if logical_support:
    468                         logical = _get_logical(self.source,
    469                                                result,
    470                                                logical_start,
    471                                                logical_end)
    472                         if logical and set(range(
    473                             logical[0][0] + 1,
    474                             logical[1][0] + 1)).intersection(
    475                                 completed_lines):
    476                             continue
    477 
    478                     modified_lines = fix(result, logical)
    479                 else:
    480                     modified_lines = fix(result)
    481 
    482                 if modified_lines is None:
    483                     # Force logical fixes to report what they modified.
    484                     assert not is_logical_fix
    485 
    486                     if self.source[line_index] == original_line:
    487                         modified_lines = []
    488 
    489                 if modified_lines:
    490                     completed_lines.update(modified_lines)
    491                 elif modified_lines == []:  # Empty list means no fix
    492                     if self.options.verbose >= 2:
    493                         print(
    494                             '--->  Not fixing {f} on line {l}'.format(
    495                                 f=result['id'], l=result['line']),
    496                             file=sys.stderr)
    497                 else:  # We assume one-line fix when None.
    498                     completed_lines.add(result['line'])
    499             else:
    500                 if self.options.verbose >= 3:
    501                     print(
    502                         "--->  '{0}' is not defined.".format(fixed_methodname),
    503                         file=sys.stderr)
    504 
    505                     info = result['info'].strip()
    506                     print('--->  {0}:{1}:{2}:{3}'.format(self.filename,
    507                                                          result['line'],
    508                                                          result['column'],
    509                                                          info),
    510                           file=sys.stderr)
    511 
    512     def fix(self):
    513         """Return a version of the source code with PEP 8 violations fixed."""
    514         pep8_options = {
    515             'ignore': self.options.ignore,
    516             'select': self.options.select,
    517             'max_line_length': self.options.max_line_length,
    518         }
    519         results = _execute_pep8(pep8_options, self.source)
    520 
    521         if self.options.verbose:
    522             progress = {}
    523             for r in results:
    524                 if r['id'] not in progress:
    525                     progress[r['id']] = set()
    526                 progress[r['id']].add(r['line'])
    527             print('--->  {n} issue(s) to fix {progress}'.format(
    528                 n=len(results), progress=progress), file=sys.stderr)
    529 
    530         if self.options.line_range:
    531             start, end = self.options.line_range
    532             results = [r for r in results
    533                        if start <= r['line'] <= end]
    534 
    535         self._fix_source(filter_results(source=''.join(self.source),
    536                                         results=results,
    537                                         aggressive=self.options.aggressive))
    538 
    539         if self.options.line_range:
    540             # If number of lines has changed then change line_range.
    541             count = sum(sline.count('\n')
    542                         for sline in self.source[start - 1:end])
    543             self.options.line_range[1] = start + count - 1
    544 
    545         return ''.join(self.source)
    546 
    547     def _fix_reindent(self, result):
    548         """Fix a badly indented line.
    549 
    550         This is done by adding or removing from its initial indent only.
    551 
    552         """
    553         num_indent_spaces = int(result['info'].split()[1])
    554         line_index = result['line'] - 1
    555         target = self.source[line_index]
    556 
    557         self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
    558 
    559     def fix_e112(self, result):
    560         """Fix under-indented comments."""
    561         line_index = result['line'] - 1
    562         target = self.source[line_index]
    563 
    564         if not target.lstrip().startswith('#'):
    565             # Don't screw with invalid syntax.
    566             return []
    567 
    568         self.source[line_index] = self.indent_word + target
    569 
    570     def fix_e113(self, result):
    571         """Fix over-indented comments."""
    572         line_index = result['line'] - 1
    573         target = self.source[line_index]
    574 
    575         indent = _get_indentation(target)
    576         stripped = target.lstrip()
    577 
    578         if not stripped.startswith('#'):
    579             # Don't screw with invalid syntax.
    580             return []
    581 
    582         self.source[line_index] = indent[1:] + stripped
    583 
    584     def fix_e125(self, result):
    585         """Fix indentation undistinguish from the next logical line."""
    586         num_indent_spaces = int(result['info'].split()[1])
    587         line_index = result['line'] - 1
    588         target = self.source[line_index]
    589 
    590         spaces_to_add = num_indent_spaces - len(_get_indentation(target))
    591         indent = len(_get_indentation(target))
    592         modified_lines = []
    593 
    594         while len(_get_indentation(self.source[line_index])) >= indent:
    595             self.source[line_index] = (' ' * spaces_to_add +
    596                                        self.source[line_index])
    597             modified_lines.append(1 + line_index)  # Line indexed at 1.
    598             line_index -= 1
    599 
    600         return modified_lines
    601 
    602     def fix_e201(self, result):
    603         """Remove extraneous whitespace."""
    604         line_index = result['line'] - 1
    605         target = self.source[line_index]
    606         offset = result['column'] - 1
    607 
    608         if is_probably_part_of_multiline(target):
    609             return []
    610 
    611         fixed = fix_whitespace(target,
    612                                offset=offset,
    613                                replacement='')
    614 
    615         self.source[line_index] = fixed
    616 
    617     def fix_e224(self, result):
    618         """Remove extraneous whitespace around operator."""
    619         target = self.source[result['line'] - 1]
    620         offset = result['column'] - 1
    621         fixed = target[:offset] + target[offset:].replace('\t', ' ')
    622         self.source[result['line'] - 1] = fixed
    623 
    624     def fix_e225(self, result):
    625         """Fix missing whitespace around operator."""
    626         target = self.source[result['line'] - 1]
    627         offset = result['column'] - 1
    628         fixed = target[:offset] + ' ' + target[offset:]
    629 
    630         # Only proceed if non-whitespace characters match.
    631         # And make sure we don't break the indentation.
    632         if (
    633             fixed.replace(' ', '') == target.replace(' ', '') and
    634             _get_indentation(fixed) == _get_indentation(target)
    635         ):
    636             self.source[result['line'] - 1] = fixed
    637         else:
    638             return []
    639 
    640     def fix_e231(self, result):
    641         """Add missing whitespace."""
    642         # Optimize for comma case. This will fix all commas in the full source
    643         # code in one pass. Don't do this more than once. If it fails the first
    644         # time, there is no point in trying again.
    645         if ',' in result['info'] and not self._ws_comma_done:
    646             self._ws_comma_done = True
    647             original = ''.join(self.source)
    648             new = refactor(original, ['ws_comma'])
    649             if original.strip() != new.strip():
    650                 self.source = [new]
    651                 return range(1, 1 + len(original))
    652 
    653         line_index = result['line'] - 1
    654         target = self.source[line_index]
    655         offset = result['column']
    656         fixed = target[:offset] + ' ' + target[offset:]
    657         self.source[line_index] = fixed
    658 
    659     def fix_e251(self, result):
    660         """Remove whitespace around parameter '=' sign."""
    661         line_index = result['line'] - 1
    662         target = self.source[line_index]
    663 
    664         # This is necessary since pep8 sometimes reports columns that goes
    665         # past the end of the physical line. This happens in cases like,
    666         # foo(bar\n=None)
    667         c = min(result['column'] - 1,
    668                 len(target) - 1)
    669 
    670         if target[c].strip():
    671             fixed = target
    672         else:
    673             fixed = target[:c].rstrip() + target[c:].lstrip()
    674 
    675         # There could be an escaped newline
    676         #
    677         #     def foo(a=\
    678         #             1)
    679         if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
    680             self.source[line_index] = fixed.rstrip('\n\r \t\\')
    681             self.source[line_index + 1] = self.source[line_index + 1].lstrip()
    682             return [line_index + 1, line_index + 2]  # Line indexed at 1
    683 
    684         self.source[result['line'] - 1] = fixed
    685 
    686     def fix_e262(self, result):
    687         """Fix spacing after comment hash."""
    688         target = self.source[result['line'] - 1]
    689         offset = result['column']
    690 
    691         code = target[:offset].rstrip(' \t#')
    692         comment = target[offset:].lstrip(' \t#')
    693 
    694         fixed = code + ('  # ' + comment if comment.strip() else '\n')
    695 
    696         self.source[result['line'] - 1] = fixed
    697 
    698     def fix_e271(self, result):
    699         """Fix extraneous whitespace around keywords."""
    700         line_index = result['line'] - 1
    701         target = self.source[line_index]
    702         offset = result['column'] - 1
    703 
    704         if is_probably_part_of_multiline(target):
    705             return []
    706 
    707         fixed = fix_whitespace(target,
    708                                offset=offset,
    709                                replacement=' ')
    710 
    711         if fixed == target:
    712             return []
    713         else:
    714             self.source[line_index] = fixed
    715 
    716     def fix_e301(self, result):
    717         """Add missing blank line."""
    718         cr = '\n'
    719         self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
    720 
    721     def fix_e302(self, result):
    722         """Add missing 2 blank lines."""
    723         add_linenum = 2 - int(result['info'].split()[-1])
    724         cr = '\n' * add_linenum
    725         self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
    726 
    727     def fix_e303(self, result):
    728         """Remove extra blank lines."""
    729         delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
    730         delete_linenum = max(1, delete_linenum)
    731 
    732         # We need to count because pep8 reports an offset line number if there
    733         # are comments.
    734         cnt = 0
    735         line = result['line'] - 2
    736         modified_lines = []
    737         while cnt < delete_linenum and line >= 0:
    738             if not self.source[line].strip():
    739                 self.source[line] = ''
    740                 modified_lines.append(1 + line)  # Line indexed at 1
    741                 cnt += 1
    742             line -= 1
    743 
    744         return modified_lines
    745 
    746     def fix_e304(self, result):
    747         """Remove blank line following function decorator."""
    748         line = result['line'] - 2
    749         if not self.source[line].strip():
    750             self.source[line] = ''
    751 
    752     def fix_e401(self, result):
    753         """Put imports on separate lines."""
    754         line_index = result['line'] - 1
    755         target = self.source[line_index]
    756         offset = result['column'] - 1
    757 
    758         if not target.lstrip().startswith('import'):
    759             return []
    760 
    761         indentation = re.split(pattern=r'\bimport\b',
    762                                string=target, maxsplit=1)[0]
    763         fixed = (target[:offset].rstrip('\t ,') + '\n' +
    764                  indentation + 'import ' + target[offset:].lstrip('\t ,'))
    765         self.source[line_index] = fixed
    766 
    767     def fix_long_line_logically(self, result, logical):
    768         """Try to make lines fit within --max-line-length characters."""
    769         if (
    770             not logical or
    771             len(logical[2]) == 1 or
    772             self.source[result['line'] - 1].lstrip().startswith('#')
    773         ):
    774             return self.fix_long_line_physically(result)
    775 
    776         start_line_index = logical[0][0]
    777         end_line_index = logical[1][0]
    778         logical_lines = logical[2]
    779 
    780         previous_line = get_item(self.source, start_line_index - 1, default='')
    781         next_line = get_item(self.source, end_line_index + 1, default='')
    782 
    783         single_line = join_logical_line(''.join(logical_lines))
    784 
    785         try:
    786             fixed = self.fix_long_line(
    787                 target=single_line,
    788                 previous_line=previous_line,
    789                 next_line=next_line,
    790                 original=''.join(logical_lines))
    791         except (SyntaxError, tokenize.TokenError):
    792             return self.fix_long_line_physically(result)
    793 
    794         if fixed:
    795             for line_index in range(start_line_index, end_line_index + 1):
    796                 self.source[line_index] = ''
    797             self.source[start_line_index] = fixed
    798             return range(start_line_index + 1, end_line_index + 1)
    799         else:
    800             return []
    801 
    802     def fix_long_line_physically(self, result):
    803         """Try to make lines fit within --max-line-length characters."""
    804         line_index = result['line'] - 1
    805         target = self.source[line_index]
    806 
    807         previous_line = get_item(self.source, line_index - 1, default='')
    808         next_line = get_item(self.source, line_index + 1, default='')
    809 
    810         try:
    811             fixed = self.fix_long_line(
    812                 target=target,
    813                 previous_line=previous_line,
    814                 next_line=next_line,
    815                 original=target)
    816         except (SyntaxError, tokenize.TokenError):
    817             return []
    818 
    819         if fixed:
    820             self.source[line_index] = fixed
    821             return [line_index + 1]
    822         else:
    823             return []
    824 
    825     def fix_long_line(self, target, previous_line,
    826                       next_line, original):
    827         cache_entry = (target, previous_line, next_line)
    828         if cache_entry in self.long_line_ignore_cache:
    829             return []
    830 
    831         if target.lstrip().startswith('#'):
    832             # Wrap commented lines.
    833             return shorten_comment(
    834                 line=target,
    835                 max_line_length=self.options.max_line_length,
    836                 last_comment=not next_line.lstrip().startswith('#'))
    837 
    838         fixed = get_fixed_long_line(
    839             target=target,
    840             previous_line=previous_line,
    841             original=original,
    842             indent_word=self.indent_word,
    843             max_line_length=self.options.max_line_length,
    844             aggressive=self.options.aggressive,
    845             experimental=self.options.experimental,
    846             verbose=self.options.verbose)
    847         if fixed and not code_almost_equal(original, fixed):
    848             return fixed
    849         else:
    850             self.long_line_ignore_cache.add(cache_entry)
    851             return None
    852 
    853     def fix_e502(self, result):
    854         """Remove extraneous escape of newline."""
    855         line_index = result['line'] - 1
    856         target = self.source[line_index]
    857         self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
    858 
    859     def fix_e701(self, result):
    860         """Put colon-separated compound statement on separate lines."""
    861         line_index = result['line'] - 1
    862         target = self.source[line_index]
    863         c = result['column']
    864 
    865         fixed_source = (target[:c] + '\n' +
    866                         _get_indentation(target) + self.indent_word +
    867                         target[c:].lstrip('\n\r \t\\'))
    868         self.source[result['line'] - 1] = fixed_source
    869         return [result['line'], result['line'] + 1]
    870 
    871     def fix_e702(self, result, logical):
    872         """Put semicolon-separated compound statement on separate lines."""
    873         if not logical:
    874             return []  # pragma: no cover
    875         logical_lines = logical[2]
    876 
    877         line_index = result['line'] - 1
    878         target = self.source[line_index]
    879 
    880         if target.rstrip().endswith('\\'):
    881             # Normalize '1; \\\n2' into '1; 2'.
    882             self.source[line_index] = target.rstrip('\n \r\t\\')
    883             self.source[line_index + 1] = self.source[line_index + 1].lstrip()
    884             return [line_index + 1, line_index + 2]
    885 
    886         if target.rstrip().endswith(';'):
    887             self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
    888             return [line_index + 1]
    889 
    890         offset = result['column'] - 1
    891         first = target[:offset].rstrip(';').rstrip()
    892         second = (_get_indentation(logical_lines[0]) +
    893                   target[offset:].lstrip(';').lstrip())
    894 
    895         self.source[line_index] = first + '\n' + second
    896         return [line_index + 1]
    897 
    898     def fix_e711(self, result):
    899         """Fix comparison with None."""
    900         line_index = result['line'] - 1
    901         target = self.source[line_index]
    902         offset = result['column'] - 1
    903 
    904         right_offset = offset + 2
    905         if right_offset >= len(target):
    906             return []
    907 
    908         left = target[:offset].rstrip()
    909         center = target[offset:right_offset]
    910         right = target[right_offset:].lstrip()
    911 
    912         if not right.startswith('None'):
    913             return []
    914 
    915         if center.strip() == '==':
    916             new_center = 'is'
    917         elif center.strip() == '!=':
    918             new_center = 'is not'
    919         else:
    920             return []
    921 
    922         self.source[line_index] = ' '.join([left, new_center, right])
    923 
    924     def fix_e712(self, result):
    925         """Fix comparison with boolean."""
    926         line_index = result['line'] - 1
    927         target = self.source[line_index]
    928         offset = result['column'] - 1
    929 
    930         # Handle very easy "not" special cases.
    931         if re.match(r'^\s*if \w+ == False:$', target):
    932             self.source[line_index] = re.sub(r'if (\w+) == False:',
    933                                              r'if not \1:', target, count=1)
    934         elif re.match(r'^\s*if \w+ != True:$', target):
    935             self.source[line_index] = re.sub(r'if (\w+) != True:',
    936                                              r'if not \1:', target, count=1)
    937         else:
    938             right_offset = offset + 2
    939             if right_offset >= len(target):
    940                 return []
    941 
    942             left = target[:offset].rstrip()
    943             center = target[offset:right_offset]
    944             right = target[right_offset:].lstrip()
    945 
    946             # Handle simple cases only.
    947             new_right = None
    948             if center.strip() == '==':
    949                 if re.match(r'\bTrue\b', right):
    950                     new_right = re.sub(r'\bTrue\b *', '', right, count=1)
    951             elif center.strip() == '!=':
    952                 if re.match(r'\bFalse\b', right):
    953                     new_right = re.sub(r'\bFalse\b *', '', right, count=1)
    954 
    955             if new_right is None:
    956                 return []
    957 
    958             if new_right[0].isalnum():
    959                 new_right = ' ' + new_right
    960 
    961             self.source[line_index] = left + new_right
    962 
    963     def fix_e713(self, result):
    964         """Fix non-membership check."""
    965         line_index = result['line'] - 1
    966         target = self.source[line_index]
    967 
    968         # Handle very easy case only.
    969         if re.match(r'^\s*if not \w+ in \w+:$', target):
    970             self.source[line_index] = re.sub(r'if not (\w+) in (\w+):',
    971                                              r'if \1 not in \2:',
    972                                              target,
    973                                              count=1)
    974 
    975     def fix_w291(self, result):
    976         """Remove trailing whitespace."""
    977         fixed_line = self.source[result['line'] - 1].rstrip()
    978         self.source[result['line'] - 1] = fixed_line + '\n'
    979 
    980 
    981 def get_fixed_long_line(target, previous_line, original,
    982                         indent_word='    ', max_line_length=79,
    983                         aggressive=False, experimental=False, verbose=False):
    984     """Break up long line and return result.
    985 
    986     Do this by generating multiple reformatted candidates and then
    987     ranking the candidates to heuristically select the best option.
    988 
    989     """
    990     indent = _get_indentation(target)
    991     source = target[len(indent):]
    992     assert source.lstrip() == source
    993 
    994     # Check for partial multiline.
    995     tokens = list(generate_tokens(source))
    996 
    997     candidates = shorten_line(
    998         tokens, source, indent,
    999         indent_word,
   1000         max_line_length,
   1001         aggressive=aggressive,
   1002         experimental=experimental,
   1003         previous_line=previous_line)
   1004 
   1005     # Also sort alphabetically as a tie breaker (for determinism).
   1006     candidates = sorted(
   1007         sorted(set(candidates).union([target, original])),
   1008         key=lambda x: line_shortening_rank(x,
   1009                                            indent_word,
   1010                                            max_line_length,
   1011                                            experimental))
   1012 
   1013     if verbose >= 4:
   1014         print(('-' * 79 + '\n').join([''] + candidates + ['']),
   1015               file=codecs.getwriter('utf-8')(sys.stderr.buffer
   1016                                              if hasattr(sys.stderr,
   1017                                                         'buffer')
   1018                                              else sys.stderr))
   1019 
   1020     if candidates:
   1021         return candidates[0]
   1022 
   1023 
   1024 def join_logical_line(logical_line):
   1025     """Return single line based on logical line input."""
   1026     indentation = _get_indentation(logical_line)
   1027 
   1028     return indentation + untokenize_without_newlines(
   1029         generate_tokens(logical_line.lstrip())) + '\n'
   1030 
   1031 
   1032 def untokenize_without_newlines(tokens):
   1033     """Return source code based on tokens."""
   1034     text = ''
   1035     last_row = 0
   1036     last_column = -1
   1037 
   1038     for t in tokens:
   1039         token_string = t[1]
   1040         (start_row, start_column) = t[2]
   1041         (end_row, end_column) = t[3]
   1042 
   1043         if start_row > last_row:
   1044             last_column = 0
   1045         if (
   1046             (start_column > last_column or token_string == '\n') and
   1047             not text.endswith(' ')
   1048         ):
   1049             text += ' '
   1050 
   1051         if token_string != '\n':
   1052             text += token_string
   1053 
   1054         last_row = end_row
   1055         last_column = end_column
   1056 
   1057     return text
   1058 
   1059 
   1060 def _find_logical(source_lines):
   1061     # Make a variable which is the index of all the starts of lines.
   1062     logical_start = []
   1063     logical_end = []
   1064     last_newline = True
   1065     parens = 0
   1066     for t in generate_tokens(''.join(source_lines)):
   1067         if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
   1068                     tokenize.INDENT, tokenize.NL,
   1069                     tokenize.ENDMARKER]:
   1070             continue
   1071         if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
   1072             last_newline = True
   1073             logical_end.append((t[3][0] - 1, t[2][1]))
   1074             continue
   1075         if last_newline and not parens:
   1076             logical_start.append((t[2][0] - 1, t[2][1]))
   1077             last_newline = False
   1078         if t[0] == tokenize.OP:
   1079             if t[1] in '([{':
   1080                 parens += 1
   1081             elif t[1] in '}])':
   1082                 parens -= 1
   1083     return (logical_start, logical_end)
   1084 
   1085 
   1086 def _get_logical(source_lines, result, logical_start, logical_end):
   1087     """Return the logical line corresponding to the result.
   1088 
   1089     Assumes input is already E702-clean.
   1090 
   1091     """
   1092     row = result['line'] - 1
   1093     col = result['column'] - 1
   1094     ls = None
   1095     le = None
   1096     for i in range(0, len(logical_start), 1):
   1097         assert logical_end
   1098         x = logical_end[i]
   1099         if x[0] > row or (x[0] == row and x[1] > col):
   1100             le = x
   1101             ls = logical_start[i]
   1102             break
   1103     if ls is None:
   1104         return None
   1105     original = source_lines[ls[0]:le[0] + 1]
   1106     return ls, le, original
   1107 
   1108 
   1109 def get_item(items, index, default=None):
   1110     if 0 <= index < len(items):
   1111         return items[index]
   1112     else:
   1113         return default
   1114 
   1115 
   1116 def reindent(source, indent_size):
   1117     """Reindent all lines."""
   1118     reindenter = Reindenter(source)
   1119     return reindenter.run(indent_size)
   1120 
   1121 
   1122 def code_almost_equal(a, b):
   1123     """Return True if code is similar.
   1124 
   1125     Ignore whitespace when comparing specific line.
   1126 
   1127     """
   1128     split_a = split_and_strip_non_empty_lines(a)
   1129     split_b = split_and_strip_non_empty_lines(b)
   1130 
   1131     if len(split_a) != len(split_b):
   1132         return False
   1133 
   1134     for index in range(len(split_a)):
   1135         if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
   1136             return False
   1137 
   1138     return True
   1139 
   1140 
   1141 def split_and_strip_non_empty_lines(text):
   1142     """Return lines split by newline.
   1143 
   1144     Ignore empty lines.
   1145 
   1146     """
   1147     return [line.strip() for line in text.splitlines() if line.strip()]
   1148 
   1149 
   1150 def fix_e265(source, aggressive=False):  # pylint: disable=unused-argument
   1151     """Format block comments."""
   1152     if '#' not in source:
   1153         # Optimization.
   1154         return source
   1155 
   1156     ignored_line_numbers = multiline_string_lines(
   1157         source,
   1158         include_docstrings=True) | set(commented_out_code_lines(source))
   1159 
   1160     fixed_lines = []
   1161     sio = io.StringIO(source)
   1162     for (line_number, line) in enumerate(sio.readlines(), start=1):
   1163         if (
   1164             line.lstrip().startswith('#') and
   1165             line_number not in ignored_line_numbers
   1166         ):
   1167             indentation = _get_indentation(line)
   1168             line = line.lstrip()
   1169 
   1170             # Normalize beginning if not a shebang.
   1171             if len(line) > 1:
   1172                 if (
   1173                     # Leave multiple spaces like '#    ' alone.
   1174                     (line.count('#') > 1 or line[1].isalnum())
   1175                     # Leave stylistic outlined blocks alone.
   1176                     and not line.rstrip().endswith('#')
   1177                 ):
   1178                     line = '# ' + line.lstrip('# \t')
   1179 
   1180             fixed_lines.append(indentation + line)
   1181         else:
   1182             fixed_lines.append(line)
   1183 
   1184     return ''.join(fixed_lines)
   1185 
   1186 
   1187 def refactor(source, fixer_names, ignore=None):
   1188     """Return refactored code using lib2to3.
   1189 
   1190     Skip if ignore string is produced in the refactored code.
   1191 
   1192     """
   1193     from lib2to3 import pgen2
   1194     try:
   1195         new_text = refactor_with_2to3(source,
   1196                                       fixer_names=fixer_names)
   1197     except (pgen2.parse.ParseError,
   1198             SyntaxError,
   1199             UnicodeDecodeError,
   1200             UnicodeEncodeError):
   1201         return source
   1202 
   1203     if ignore:
   1204         if ignore in new_text and ignore not in source:
   1205             return source
   1206 
   1207     return new_text
   1208 
   1209 
   1210 def code_to_2to3(select, ignore):
   1211     fixes = set()
   1212     for code, fix in CODE_TO_2TO3.items():
   1213         if code_match(code, select=select, ignore=ignore):
   1214             fixes |= set(fix)
   1215     return fixes
   1216 
   1217 
   1218 def fix_2to3(source, aggressive=True, select=None, ignore=None):
   1219     """Fix various deprecated code (via lib2to3)."""
   1220     if not aggressive:
   1221         return source
   1222 
   1223     select = select or []
   1224     ignore = ignore or []
   1225 
   1226     return refactor(source,
   1227                     code_to_2to3(select=select,
   1228                                  ignore=ignore))
   1229 
   1230 
   1231 def fix_w602(source, aggressive=True):
   1232     """Fix deprecated form of raising exception."""
   1233     if not aggressive:
   1234         return source
   1235 
   1236     return refactor(source, ['raise'],
   1237                     ignore='with_traceback')
   1238 
   1239 
   1240 def find_newline(source):
   1241     """Return type of newline used in source.
   1242 
   1243     Input is a list of lines.
   1244 
   1245     """
   1246     assert not isinstance(source, unicode)
   1247 
   1248     counter = collections.defaultdict(int)
   1249     for line in source:
   1250         if line.endswith(CRLF):
   1251             counter[CRLF] += 1
   1252         elif line.endswith(CR):
   1253             counter[CR] += 1
   1254         elif line.endswith(LF):
   1255             counter[LF] += 1
   1256 
   1257     return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
   1258 
   1259 
   1260 def _get_indentword(source):
   1261     """Return indentation type."""
   1262     indent_word = '    '  # Default in case source has no indentation
   1263     try:
   1264         for t in generate_tokens(source):
   1265             if t[0] == token.INDENT:
   1266                 indent_word = t[1]
   1267                 break
   1268     except (SyntaxError, tokenize.TokenError):
   1269         pass
   1270     return indent_word
   1271 
   1272 
   1273 def _get_indentation(line):
   1274     """Return leading whitespace."""
   1275     if line.strip():
   1276         non_whitespace_index = len(line) - len(line.lstrip())
   1277         return line[:non_whitespace_index]
   1278     else:
   1279         return ''
   1280 
   1281 
   1282 def get_diff_text(old, new, filename):
   1283     """Return text of unified diff between old and new."""
   1284     newline = '\n'
   1285     diff = difflib.unified_diff(
   1286         old, new,
   1287         'original/' + filename,
   1288         'fixed/' + filename,
   1289         lineterm=newline)
   1290 
   1291     text = ''
   1292     for line in diff:
   1293         text += line
   1294 
   1295         # Work around missing newline (http://bugs.python.org/issue2142).
   1296         if text and not line.endswith(newline):
   1297             text += newline + r'\ No newline at end of file' + newline
   1298 
   1299     return text
   1300 
   1301 
   1302 def _priority_key(pep8_result):
   1303     """Key for sorting PEP8 results.
   1304 
   1305     Global fixes should be done first. This is important for things like
   1306     indentation.
   1307 
   1308     """
   1309     priority = [
   1310         # Fix multiline colon-based before semicolon based.
   1311         'e701',
   1312         # Break multiline statements early.
   1313         'e702',
   1314         # Things that make lines longer.
   1315         'e225', 'e231',
   1316         # Remove extraneous whitespace before breaking lines.
   1317         'e201',
   1318         # Shorten whitespace in comment before resorting to wrapping.
   1319         'e262'
   1320     ]
   1321     middle_index = 10000
   1322     lowest_priority = [
   1323         # We need to shorten lines last since the logical fixer can get in a
   1324         # loop, which causes us to exit early.
   1325         'e501'
   1326     ]
   1327     key = pep8_result['id'].lower()
   1328     try:
   1329         return priority.index(key)
   1330     except ValueError:
   1331         try:
   1332             return middle_index + lowest_priority.index(key) + 1
   1333         except ValueError:
   1334             return middle_index
   1335 
   1336 
   1337 def shorten_line(tokens, source, indentation, indent_word, max_line_length,
   1338                  aggressive=False, experimental=False, previous_line=''):
   1339     """Separate line at OPERATOR.
   1340 
   1341     Multiple candidates will be yielded.
   1342 
   1343     """
   1344     for candidate in _shorten_line(tokens=tokens,
   1345                                    source=source,
   1346                                    indentation=indentation,
   1347                                    indent_word=indent_word,
   1348                                    aggressive=aggressive,
   1349                                    previous_line=previous_line):
   1350         yield candidate
   1351 
   1352     if aggressive:
   1353         for key_token_strings in SHORTEN_OPERATOR_GROUPS:
   1354             shortened = _shorten_line_at_tokens(
   1355                 tokens=tokens,
   1356                 source=source,
   1357                 indentation=indentation,
   1358                 indent_word=indent_word,
   1359                 key_token_strings=key_token_strings,
   1360                 aggressive=aggressive)
   1361 
   1362             if shortened is not None and shortened != source:
   1363                 yield shortened
   1364 
   1365     if experimental:
   1366         for shortened in _shorten_line_at_tokens_new(
   1367                 tokens=tokens,
   1368                 source=source,
   1369                 indentation=indentation,
   1370                 max_line_length=max_line_length):
   1371 
   1372             yield shortened
   1373 
   1374 
   1375 def _shorten_line(tokens, source, indentation, indent_word,
   1376                   aggressive=False, previous_line=''):
   1377     """Separate line at OPERATOR.
   1378 
   1379     The input is expected to be free of newlines except for inside multiline
   1380     strings and at the end.
   1381 
   1382     Multiple candidates will be yielded.
   1383 
   1384     """
   1385     for (token_type,
   1386          token_string,
   1387          start_offset,
   1388          end_offset) in token_offsets(tokens):
   1389 
   1390         if (
   1391             token_type == tokenize.COMMENT and
   1392             not is_probably_part_of_multiline(previous_line) and
   1393             not is_probably_part_of_multiline(source) and
   1394             not source[start_offset + 1:].strip().lower().startswith(
   1395                 ('noqa', 'pragma:', 'pylint:'))
   1396         ):
   1397             # Move inline comments to previous line.
   1398             first = source[:start_offset]
   1399             second = source[start_offset:]
   1400             yield (indentation + second.strip() + '\n' +
   1401                    indentation + first.strip() + '\n')
   1402         elif token_type == token.OP and token_string != '=':
   1403             # Don't break on '=' after keyword as this violates PEP 8.
   1404 
   1405             assert token_type != token.INDENT
   1406 
   1407             first = source[:end_offset]
   1408 
   1409             second_indent = indentation
   1410             if first.rstrip().endswith('('):
   1411                 second_indent += indent_word
   1412             elif '(' in first:
   1413                 second_indent += ' ' * (1 + first.find('('))
   1414             else:
   1415                 second_indent += indent_word
   1416 
   1417             second = (second_indent + source[end_offset:].lstrip())
   1418             if (
   1419                 not second.strip() or
   1420                 second.lstrip().startswith('#')
   1421             ):
   1422                 continue
   1423 
   1424             # Do not begin a line with a comma
   1425             if second.lstrip().startswith(','):
   1426                 continue
   1427             # Do end a line with a dot
   1428             if first.rstrip().endswith('.'):
   1429                 continue
   1430             if token_string in '+-*/':
   1431                 fixed = first + ' \\' + '\n' + second
   1432             else:
   1433                 fixed = first + '\n' + second
   1434 
   1435             # Only fix if syntax is okay.
   1436             if check_syntax(normalize_multiline(fixed)
   1437                             if aggressive else fixed):
   1438                 yield indentation + fixed
   1439 
   1440 
   1441 # A convenient way to handle tokens.
   1442 Token = collections.namedtuple('Token', ['token_type', 'token_string',
   1443                                          'spos', 'epos', 'line'])
   1444 
   1445 
   1446 class ReformattedLines(object):
   1447 
   1448     """The reflowed lines of atoms.
   1449 
   1450     Each part of the line is represented as an "atom." They can be moved
   1451     around when need be to get the optimal formatting.
   1452 
   1453     """
   1454 
   1455     ###########################################################################
   1456     # Private Classes
   1457 
   1458     class _Indent(object):
   1459 
   1460         """Represent an indentation in the atom stream."""
   1461 
   1462         def __init__(self, indent_amt):
   1463             self._indent_amt = indent_amt
   1464 
   1465         def emit(self):
   1466             return ' ' * self._indent_amt
   1467 
   1468         @property
   1469         def size(self):
   1470             return self._indent_amt
   1471 
   1472     class _Space(object):
   1473 
   1474         """Represent a space in the atom stream."""
   1475 
   1476         def emit(self):
   1477             return ' '
   1478 
   1479         @property
   1480         def size(self):
   1481             return 1
   1482 
   1483     class _LineBreak(object):
   1484 
   1485         """Represent a line break in the atom stream."""
   1486 
   1487         def emit(self):
   1488             return '\n'
   1489 
   1490         @property
   1491         def size(self):
   1492             return 0
   1493 
   1494     def __init__(self, max_line_length):
   1495         self._max_line_length = max_line_length
   1496         self._lines = []
   1497         self._bracket_depth = 0
   1498         self._prev_item = None
   1499         self._prev_prev_item = None
   1500 
   1501     def __repr__(self):
   1502         return self.emit()
   1503 
   1504     ###########################################################################
   1505     # Public Methods
   1506 
   1507     def add(self, obj, indent_amt, break_after_open_bracket):
   1508         if isinstance(obj, Atom):
   1509             self._add_item(obj, indent_amt)
   1510             return
   1511 
   1512         self._add_container(obj, indent_amt, break_after_open_bracket)
   1513 
   1514     def add_comment(self, item):
   1515         num_spaces = 2
   1516         if len(self._lines) > 1:
   1517             if isinstance(self._lines[-1], self._Space):
   1518                 num_spaces -= 1
   1519             if len(self._lines) > 2:
   1520                 if isinstance(self._lines[-2], self._Space):
   1521                     num_spaces -= 1
   1522 
   1523         while num_spaces > 0:
   1524             self._lines.append(self._Space())
   1525             num_spaces -= 1
   1526         self._lines.append(item)
   1527 
   1528     def add_indent(self, indent_amt):
   1529         self._lines.append(self._Indent(indent_amt))
   1530 
   1531     def add_line_break(self, indent):
   1532         self._lines.append(self._LineBreak())
   1533         self.add_indent(len(indent))
   1534 
   1535     def add_line_break_at(self, index, indent_amt):
   1536         self._lines.insert(index, self._LineBreak())
   1537         self._lines.insert(index + 1, self._Indent(indent_amt))
   1538 
   1539     def add_space_if_needed(self, curr_text, equal=False):
   1540         if (
   1541             not self._lines or isinstance(
   1542                 self._lines[-1], (self._LineBreak, self._Indent, self._Space))
   1543         ):
   1544             return
   1545 
   1546         prev_text = unicode(self._prev_item)
   1547         prev_prev_text = (
   1548             unicode(self._prev_prev_item) if self._prev_prev_item else '')
   1549 
   1550         if (
   1551             # The previous item was a keyword or identifier and the current
   1552             # item isn't an operator that doesn't require a space.
   1553             ((self._prev_item.is_keyword or self._prev_item.is_string or
   1554               self._prev_item.is_name or self._prev_item.is_number) and
   1555              (curr_text[0] not in '([{.,:}])' or
   1556               (curr_text[0] == '=' and equal))) or
   1557 
   1558             # Don't place spaces around a '.', unless it's in an 'import'
   1559             # statement.
   1560             ((prev_prev_text != 'from' and prev_text[-1] != '.' and
   1561               curr_text != 'import') and
   1562 
   1563              # Don't place a space before a colon.
   1564              curr_text[0] != ':' and
   1565 
   1566              # Don't split up ending brackets by spaces.
   1567              ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
   1568 
   1569               # Put a space after a colon or comma.
   1570               prev_text[-1] in ':,' or
   1571 
   1572               # Put space around '=' if asked to.
   1573               (equal and prev_text == '=') or
   1574 
   1575               # Put spaces around non-unary arithmetic operators.
   1576               ((self._prev_prev_item and
   1577                 (prev_text not in '+-' and
   1578                  (self._prev_prev_item.is_name or
   1579                   self._prev_prev_item.is_number or
   1580                   self._prev_prev_item.is_string)) and
   1581                 prev_text in ('+', '-', '%', '*', '/', '//', '**')))))
   1582         ):
   1583             self._lines.append(self._Space())
   1584 
   1585     def previous_item(self):
   1586         """Return the previous non-whitespace item."""
   1587         return self._prev_item
   1588 
   1589     def fits_on_current_line(self, item_extent):
   1590         return self.current_size() + item_extent <= self._max_line_length
   1591 
   1592     def current_size(self):
   1593         """The size of the current line minus the indentation."""
   1594         size = 0
   1595         for item in reversed(self._lines):
   1596             size += item.size
   1597             if isinstance(item, self._LineBreak):
   1598                 break
   1599 
   1600         return size
   1601 
   1602     def line_empty(self):
   1603         return (self._lines and
   1604                 isinstance(self._lines[-1],
   1605                            (self._LineBreak, self._Indent)))
   1606 
   1607     def emit(self):
   1608         string = ''
   1609         for item in self._lines:
   1610             if isinstance(item, self._LineBreak):
   1611                 string = string.rstrip()
   1612             string += item.emit()
   1613 
   1614         return string.rstrip() + '\n'
   1615 
   1616     ###########################################################################
   1617     # Private Methods
   1618 
   1619     def _add_item(self, item, indent_amt):
   1620         """Add an item to the line.
   1621 
   1622         Reflow the line to get the best formatting after the item is
   1623         inserted. The bracket depth indicates if the item is being
   1624         inserted inside of a container or not.
   1625 
   1626         """
   1627         if self._prev_item and self._prev_item.is_string and item.is_string:
   1628             # Place consecutive string literals on separate lines.
   1629             self._lines.append(self._LineBreak())
   1630             self._lines.append(self._Indent(indent_amt))
   1631 
   1632         item_text = unicode(item)
   1633         if self._lines and self._bracket_depth:
   1634             # Adding the item into a container.
   1635             self._prevent_default_initializer_splitting(item, indent_amt)
   1636 
   1637             if item_text in '.,)]}':
   1638                 self._split_after_delimiter(item, indent_amt)
   1639 
   1640         elif self._lines and not self.line_empty():
   1641             # Adding the item outside of a container.
   1642             if self.fits_on_current_line(len(item_text)):
   1643                 self._enforce_space(item)
   1644 
   1645             else:
   1646                 # Line break for the new item.
   1647                 self._lines.append(self._LineBreak())
   1648                 self._lines.append(self._Indent(indent_amt))
   1649 
   1650         self._lines.append(item)
   1651         self._prev_item, self._prev_prev_item = item, self._prev_item
   1652 
   1653         if item_text in '([{':
   1654             self._bracket_depth += 1
   1655 
   1656         elif item_text in '}])':
   1657             self._bracket_depth -= 1
   1658             assert self._bracket_depth >= 0
   1659 
   1660     def _add_container(self, container, indent_amt, break_after_open_bracket):
   1661         actual_indent = indent_amt + 1
   1662 
   1663         if (
   1664             unicode(self._prev_item) != '=' and
   1665             not self.line_empty() and
   1666             not self.fits_on_current_line(
   1667                 container.size + self._bracket_depth + 2)
   1668         ):
   1669 
   1670             if unicode(container)[0] == '(' and self._prev_item.is_name:
   1671                 # Don't split before the opening bracket of a call.
   1672                 break_after_open_bracket = True
   1673                 actual_indent = indent_amt + 4
   1674             elif (
   1675                 break_after_open_bracket or
   1676                 unicode(self._prev_item) not in '([{'
   1677             ):
   1678                 # If the container doesn't fit on the current line and the
   1679                 # current line isn't empty, place the container on the next
   1680                 # line.
   1681                 self._lines.append(self._LineBreak())
   1682                 self._lines.append(self._Indent(indent_amt))
   1683                 break_after_open_bracket = False
   1684         else:
   1685             actual_indent = self.current_size() + 1
   1686             break_after_open_bracket = False
   1687 
   1688         if isinstance(container, (ListComprehension, IfExpression)):
   1689             actual_indent = indent_amt
   1690 
   1691         # Increase the continued indentation only if recursing on a
   1692         # container.
   1693         container.reflow(self, ' ' * actual_indent,
   1694                          break_after_open_bracket=break_after_open_bracket)
   1695 
   1696     def _prevent_default_initializer_splitting(self, item, indent_amt):
   1697         """Prevent splitting between a default initializer.
   1698 
   1699         When there is a default initializer, it's best to keep it all on
   1700         the same line. It's nicer and more readable, even if it goes
   1701         over the maximum allowable line length. This goes back along the
   1702         current line to determine if we have a default initializer, and,
   1703         if so, to remove extraneous whitespaces and add a line
   1704         break/indent before it if needed.
   1705 
   1706         """
   1707         if unicode(item) == '=':
   1708             # This is the assignment in the initializer. Just remove spaces for
   1709             # now.
   1710             self._delete_whitespace()
   1711             return
   1712 
   1713         if (not self._prev_item or not self._prev_prev_item or
   1714                 unicode(self._prev_item) != '='):
   1715             return
   1716 
   1717         self._delete_whitespace()
   1718         prev_prev_index = self._lines.index(self._prev_prev_item)
   1719 
   1720         if (
   1721             isinstance(self._lines[prev_prev_index - 1], self._Indent) or
   1722             self.fits_on_current_line(item.size + 1)
   1723         ):
   1724             # The default initializer is already the only item on this line.
   1725             # Don't insert a newline here.
   1726             return
   1727 
   1728         # Replace the space with a newline/indent combo.
   1729         if isinstance(self._lines[prev_prev_index - 1], self._Space):
   1730             del self._lines[prev_prev_index - 1]
   1731 
   1732         self.add_line_break_at(self._lines.index(self._prev_prev_item),
   1733                                indent_amt)
   1734 
   1735     def _split_after_delimiter(self, item, indent_amt):
   1736         """Split the line only after a delimiter."""
   1737         self._delete_whitespace()
   1738 
   1739         if self.fits_on_current_line(item.size):
   1740             return
   1741 
   1742         last_space = None
   1743         for item in reversed(self._lines):
   1744             if (
   1745                 last_space and
   1746                 (not isinstance(item, Atom) or not item.is_colon)
   1747             ):
   1748                 break
   1749             else:
   1750                 last_space = None
   1751             if isinstance(item, self._Space):
   1752                 last_space = item
   1753             if isinstance(item, (self._LineBreak, self._Indent)):
   1754                 return
   1755 
   1756         if not last_space:
   1757             return
   1758 
   1759         self.add_line_break_at(self._lines.index(last_space), indent_amt)
   1760 
   1761     def _enforce_space(self, item):
   1762         """Enforce a space in certain situations.
   1763 
   1764         There are cases where we will want a space where normally we
   1765         wouldn't put one. This just enforces the addition of a space.
   1766 
   1767         """
   1768         if isinstance(self._lines[-1],
   1769                       (self._Space, self._LineBreak, self._Indent)):
   1770             return
   1771 
   1772         if not self._prev_item:
   1773             return
   1774 
   1775         item_text = unicode(item)
   1776         prev_text = unicode(self._prev_item)
   1777 
   1778         # Prefer a space around a '.' in an import statement, and between the
   1779         # 'import' and '('.
   1780         if (
   1781             (item_text == '.' and prev_text == 'from') or
   1782             (item_text == 'import' and prev_text == '.') or
   1783             (item_text == '(' and prev_text == 'import')
   1784         ):
   1785             self._lines.append(self._Space())
   1786 
   1787     def _delete_whitespace(self):
   1788         """Delete all whitespace from the end of the line."""
   1789         while isinstance(self._lines[-1], (self._Space, self._LineBreak,
   1790                                            self._Indent)):
   1791             del self._lines[-1]
   1792 
   1793 
   1794 class Atom(object):
   1795 
   1796     """The smallest unbreakable unit that can be reflowed."""
   1797 
   1798     def __init__(self, atom):
   1799         self._atom = atom
   1800 
   1801     def __repr__(self):
   1802         return self._atom.token_string
   1803 
   1804     def __len__(self):
   1805         return self.size
   1806 
   1807     def reflow(
   1808         self, reflowed_lines, continued_indent, extent,
   1809         break_after_open_bracket=False,
   1810         is_list_comp_or_if_expr=False,
   1811         next_is_dot=False
   1812     ):
   1813         if self._atom.token_type == tokenize.COMMENT:
   1814             reflowed_lines.add_comment(self)
   1815             return
   1816 
   1817         total_size = extent if extent else self.size
   1818 
   1819         if self._atom.token_string not in ',:([{}])':
   1820             # Some atoms will need an extra 1-sized space token after them.
   1821             total_size += 1
   1822 
   1823         prev_item = reflowed_lines.previous_item()
   1824         if (
   1825             not is_list_comp_or_if_expr and
   1826             not reflowed_lines.fits_on_current_line(total_size) and
   1827             not (next_is_dot and
   1828                  reflowed_lines.fits_on_current_line(self.size + 1)) and
   1829             not reflowed_lines.line_empty() and
   1830             not self.is_colon and
   1831             not (prev_item and prev_item.is_name and
   1832                  unicode(self) == '(')
   1833         ):
   1834             # Start a new line if there is already something on the line and
   1835             # adding this atom would make it go over the max line length.
   1836             reflowed_lines.add_line_break(continued_indent)
   1837         else:
   1838             reflowed_lines.add_space_if_needed(unicode(self))
   1839 
   1840         reflowed_lines.add(self, len(continued_indent),
   1841                            break_after_open_bracket)
   1842 
   1843     def emit(self):
   1844         return self.__repr__()
   1845 
   1846     @property
   1847     def is_keyword(self):
   1848         return keyword.iskeyword(self._atom.token_string)
   1849 
   1850     @property
   1851     def is_string(self):
   1852         return self._atom.token_type == tokenize.STRING
   1853 
   1854     @property
   1855     def is_name(self):
   1856         return self._atom.token_type == tokenize.NAME
   1857 
   1858     @property
   1859     def is_number(self):
   1860         return self._atom.token_type == tokenize.NUMBER
   1861 
   1862     @property
   1863     def is_comma(self):
   1864         return self._atom.token_string == ','
   1865 
   1866     @property
   1867     def is_colon(self):
   1868         return self._atom.token_string == ':'
   1869 
   1870     @property
   1871     def size(self):
   1872         return len(self._atom.token_string)
   1873 
   1874 
   1875 class Container(object):
   1876 
   1877     """Base class for all container types."""
   1878 
   1879     def __init__(self, items):
   1880         self._items = items
   1881 
   1882     def __repr__(self):
   1883         string = ''
   1884         last_was_keyword = False
   1885 
   1886         for item in self._items:
   1887             if item.is_comma:
   1888                 string += ', '
   1889             elif item.is_colon:
   1890                 string += ': '
   1891             else:
   1892                 item_string = unicode(item)
   1893                 if (
   1894                     string and
   1895                     (last_was_keyword or
   1896                      (not string.endswith(tuple('([{,.:}]) ')) and
   1897                       not item_string.startswith(tuple('([{,.:}])'))))
   1898                 ):
   1899                     string += ' '
   1900                 string += item_string
   1901 
   1902             last_was_keyword = item.is_keyword
   1903         return string
   1904 
   1905     def __iter__(self):
   1906         for element in self._items:
   1907             yield element
   1908 
   1909     def __getitem__(self, idx):
   1910         return self._items[idx]
   1911 
   1912     def reflow(self, reflowed_lines, continued_indent,
   1913                break_after_open_bracket=False):
   1914         last_was_container = False
   1915         for (index, item) in enumerate(self._items):
   1916             next_item = get_item(self._items, index + 1)
   1917 
   1918             if isinstance(item, Atom):
   1919                 is_list_comp_or_if_expr = (
   1920                     isinstance(self, (ListComprehension, IfExpression)))
   1921                 item.reflow(reflowed_lines, continued_indent,
   1922                             self._get_extent(index),
   1923                             is_list_comp_or_if_expr=is_list_comp_or_if_expr,
   1924                             next_is_dot=(next_item and
   1925                                          unicode(next_item) == '.'))
   1926                 if last_was_container and item.is_comma:
   1927                     reflowed_lines.add_line_break(continued_indent)
   1928                 last_was_container = False
   1929             else:  # isinstance(item, Container)
   1930                 reflowed_lines.add(item, len(continued_indent),
   1931                                    break_after_open_bracket)
   1932                 last_was_container = not isinstance(item, (ListComprehension,
   1933                                                            IfExpression))
   1934 
   1935             if (
   1936                 break_after_open_bracket and index == 0 and
   1937                 # Prefer to keep empty containers together instead of
   1938                 # separating them.
   1939                 unicode(item) == self.open_bracket and
   1940                 (not next_item or unicode(next_item) != self.close_bracket) and
   1941                 (len(self._items) != 3 or not isinstance(next_item, Atom))
   1942             ):
   1943                 reflowed_lines.add_line_break(continued_indent)
   1944                 break_after_open_bracket = False
   1945             else:
   1946                 next_next_item = get_item(self._items, index + 2)
   1947                 if (
   1948                     unicode(item) not in ['.', '%', 'in'] and
   1949                     next_item and not isinstance(next_item, Container) and
   1950                     unicode(next_item) != ':' and
   1951                     next_next_item and (not isinstance(next_next_item, Atom) or
   1952                                         unicode(next_item) == 'not') and
   1953                     not reflowed_lines.line_empty() and
   1954                     not reflowed_lines.fits_on_current_line(
   1955                         self._get_extent(index + 1) + 2)
   1956                 ):
   1957                     reflowed_lines.add_line_break(continued_indent)
   1958 
   1959     def _get_extent(self, index):
   1960         """The extent of the full element.
   1961 
   1962         E.g., the length of a function call or keyword.
   1963 
   1964         """
   1965         extent = 0
   1966         prev_item = get_item(self._items, index - 1)
   1967         seen_dot = prev_item and unicode(prev_item) == '.'
   1968         while index < len(self._items):
   1969             item = get_item(self._items, index)
   1970             index += 1
   1971 
   1972             if isinstance(item, (ListComprehension, IfExpression)):
   1973                 break
   1974 
   1975             if isinstance(item, Container):
   1976                 if prev_item and prev_item.is_name:
   1977                     if seen_dot:
   1978                         extent += 1
   1979                     else:
   1980                         extent += item.size
   1981 
   1982                     prev_item = item
   1983                     continue
   1984             elif (unicode(item) not in ['.', '=', ':', 'not'] and
   1985                   not item.is_name and not item.is_string):
   1986                 break
   1987 
   1988             if unicode(item) == '.':
   1989                 seen_dot = True
   1990 
   1991             extent += item.size
   1992             prev_item = item
   1993 
   1994         return extent
   1995 
   1996     @property
   1997     def is_string(self):
   1998         return False
   1999 
   2000     @property
   2001     def size(self):
   2002         return len(self.__repr__())
   2003 
   2004     @property
   2005     def is_keyword(self):
   2006         return False
   2007 
   2008     @property
   2009     def is_name(self):
   2010         return False
   2011 
   2012     @property
   2013     def is_comma(self):
   2014         return False
   2015 
   2016     @property
   2017     def is_colon(self):
   2018         return False
   2019 
   2020     @property
   2021     def open_bracket(self):
   2022         return None
   2023 
   2024     @property
   2025     def close_bracket(self):
   2026         return None
   2027 
   2028 
   2029 class Tuple(Container):
   2030 
   2031     """A high-level representation of a tuple."""
   2032 
   2033     @property
   2034     def open_bracket(self):
   2035         return '('
   2036 
   2037     @property
   2038     def close_bracket(self):
   2039         return ')'
   2040 
   2041 
   2042 class List(Container):
   2043 
   2044     """A high-level representation of a list."""
   2045 
   2046     @property
   2047     def open_bracket(self):
   2048         return '['
   2049 
   2050     @property
   2051     def close_bracket(self):
   2052         return ']'
   2053 
   2054 
   2055 class DictOrSet(Container):
   2056 
   2057     """A high-level representation of a dictionary or set."""
   2058 
   2059     @property
   2060     def open_bracket(self):
   2061         return '{'
   2062 
   2063     @property
   2064     def close_bracket(self):
   2065         return '}'
   2066 
   2067 
   2068 class ListComprehension(Container):
   2069 
   2070     """A high-level representation of a list comprehension."""
   2071 
   2072     @property
   2073     def size(self):
   2074         length = 0
   2075         for item in self._items:
   2076             if isinstance(item, IfExpression):
   2077                 break
   2078             length += item.size
   2079         return length
   2080 
   2081 
   2082 class IfExpression(Container):
   2083 
   2084     """A high-level representation of an if-expression."""
   2085 
   2086 
   2087 def _parse_container(tokens, index, for_or_if=None):
   2088     """Parse a high-level container, such as a list, tuple, etc."""
   2089 
   2090     # Store the opening bracket.
   2091     items = [Atom(Token(*tokens[index]))]
   2092     index += 1
   2093 
   2094     num_tokens = len(tokens)
   2095     while index < num_tokens:
   2096         tok = Token(*tokens[index])
   2097 
   2098         if tok.token_string in ',)]}':
   2099             # First check if we're at the end of a list comprehension or
   2100             # if-expression. Don't add the ending token as part of the list
   2101             # comprehension or if-expression, because they aren't part of those
   2102             # constructs.
   2103             if for_or_if == 'for':
   2104                 return (ListComprehension(items), index - 1)
   2105 
   2106             elif for_or_if == 'if':
   2107                 return (IfExpression(items), index - 1)
   2108 
   2109             # We've reached the end of a container.
   2110             items.append(Atom(tok))
   2111 
   2112             # If not, then we are at the end of a container.
   2113             if tok.token_string == ')':
   2114                 # The end of a tuple.
   2115                 return (Tuple(items), index)
   2116 
   2117             elif tok.token_string == ']':
   2118                 # The end of a list.
   2119                 return (List(items), index)
   2120 
   2121             elif tok.token_string == '}':
   2122                 # The end of a dictionary or set.
   2123                 return (DictOrSet(items), index)
   2124 
   2125         elif tok.token_string in '([{':
   2126             # A sub-container is being defined.
   2127             (container, index) = _parse_container(tokens, index)
   2128             items.append(container)
   2129 
   2130         elif tok.token_string == 'for':
   2131             (container, index) = _parse_container(tokens, index, 'for')
   2132             items.append(container)
   2133 
   2134         elif tok.token_string == 'if':
   2135             (container, index) = _parse_container(tokens, index, 'if')
   2136             items.append(container)
   2137 
   2138         else:
   2139             items.append(Atom(tok))
   2140 
   2141         index += 1
   2142 
   2143     return (None, None)
   2144 
   2145 
   2146 def _parse_tokens(tokens):
   2147     """Parse the tokens.
   2148 
   2149     This converts the tokens into a form where we can manipulate them
   2150     more easily.
   2151 
   2152     """
   2153 
   2154     index = 0
   2155     parsed_tokens = []
   2156 
   2157     num_tokens = len(tokens)
   2158     while index < num_tokens:
   2159         tok = Token(*tokens[index])
   2160 
   2161         assert tok.token_type != token.INDENT
   2162         if tok.token_type == tokenize.NEWLINE:
   2163             # There's only one newline and it's at the end.
   2164             break
   2165 
   2166         if tok.token_string in '([{':
   2167             (container, index) = _parse_container(tokens, index)
   2168             if not container:
   2169                 return None
   2170             parsed_tokens.append(container)
   2171         else:
   2172             parsed_tokens.append(Atom(tok))
   2173 
   2174         index += 1
   2175 
   2176     return parsed_tokens
   2177 
   2178 
   2179 def _reflow_lines(parsed_tokens, indentation, max_line_length,
   2180                   start_on_prefix_line):
   2181     """Reflow the lines so that it looks nice."""
   2182 
   2183     if unicode(parsed_tokens[0]) == 'def':
   2184         # A function definition gets indented a bit more.
   2185         continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
   2186     else:
   2187         continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
   2188 
   2189     break_after_open_bracket = not start_on_prefix_line
   2190 
   2191     lines = ReformattedLines(max_line_length)
   2192     lines.add_indent(len(indentation.lstrip('\r\n')))
   2193 
   2194     if not start_on_prefix_line:
   2195         # If splitting after the opening bracket will cause the first element
   2196         # to be aligned weirdly, don't try it.
   2197         first_token = get_item(parsed_tokens, 0)
   2198         second_token = get_item(parsed_tokens, 1)
   2199 
   2200         if (
   2201             first_token and second_token and
   2202             unicode(second_token)[0] == '(' and
   2203             len(indentation) + len(first_token) + 1 == len(continued_indent)
   2204         ):
   2205             return None
   2206 
   2207     for item in parsed_tokens:
   2208         lines.add_space_if_needed(unicode(item), equal=True)
   2209 
   2210         save_continued_indent = continued_indent
   2211         if start_on_prefix_line and isinstance(item, Container):
   2212             start_on_prefix_line = False
   2213             continued_indent = ' ' * (lines.current_size() + 1)
   2214 
   2215         item.reflow(lines, continued_indent, break_after_open_bracket)
   2216         continued_indent = save_continued_indent
   2217 
   2218     return lines.emit()
   2219 
   2220 
   2221 def _shorten_line_at_tokens_new(tokens, source, indentation,
   2222                                 max_line_length):
   2223     """Shorten the line taking its length into account.
   2224 
   2225     The input is expected to be free of newlines except for inside
   2226     multiline strings and at the end.
   2227 
   2228     """
   2229     # Yield the original source so to see if it's a better choice than the
   2230     # shortened candidate lines we generate here.
   2231     yield indentation + source
   2232 
   2233     parsed_tokens = _parse_tokens(tokens)
   2234 
   2235     if parsed_tokens:
   2236         # Perform two reflows. The first one starts on the same line as the
   2237         # prefix. The second starts on the line after the prefix.
   2238         fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
   2239                               start_on_prefix_line=True)
   2240         if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
   2241             yield fixed
   2242 
   2243         fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
   2244                               start_on_prefix_line=False)
   2245         if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
   2246             yield fixed
   2247 
   2248 
   2249 def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
   2250                             key_token_strings, aggressive):
   2251     """Separate line by breaking at tokens in key_token_strings.
   2252 
   2253     The input is expected to be free of newlines except for inside
   2254     multiline strings and at the end.
   2255 
   2256     """
   2257     offsets = []
   2258     for (index, _t) in enumerate(token_offsets(tokens)):
   2259         (token_type,
   2260          token_string,
   2261          start_offset,
   2262          end_offset) = _t
   2263 
   2264         assert token_type != token.INDENT
   2265 
   2266         if token_string in key_token_strings:
   2267             # Do not break in containers with zero or one items.
   2268             unwanted_next_token = {
   2269                 '(': ')',
   2270                 '[': ']',
   2271                 '{': '}'}.get(token_string)
   2272             if unwanted_next_token:
   2273                 if (
   2274                     get_item(tokens,
   2275                              index + 1,
   2276                              default=[None, None])[1] == unwanted_next_token or
   2277                     get_item(tokens,
   2278                              index + 2,
   2279                              default=[None, None])[1] == unwanted_next_token
   2280                 ):
   2281                     continue
   2282 
   2283             if (
   2284                 index > 2 and token_string == '(' and
   2285                 tokens[index - 1][1] in ',(%['
   2286             ):
   2287                 # Don't split after a tuple start, or before a tuple start if
   2288                 # the tuple is in a list.
   2289                 continue
   2290 
   2291             if end_offset < len(source) - 1:
   2292                 # Don't split right before newline.
   2293                 offsets.append(end_offset)
   2294         else:
   2295             # Break at adjacent strings. These were probably meant to be on
   2296             # separate lines in the first place.
   2297             previous_token = get_item(tokens, index - 1)
   2298             if (
   2299                 token_type == tokenize.STRING and
   2300                 previous_token and previous_token[0] == tokenize.STRING
   2301             ):
   2302                 offsets.append(start_offset)
   2303 
   2304     current_indent = None
   2305     fixed = None
   2306     for line in split_at_offsets(source, offsets):
   2307         if fixed:
   2308             fixed += '\n' + current_indent + line
   2309 
   2310             for symbol in '([{':
   2311                 if line.endswith(symbol):
   2312                     current_indent += indent_word
   2313         else:
   2314             # First line.
   2315             fixed = line
   2316             assert not current_indent
   2317             current_indent = indent_word
   2318 
   2319     assert fixed is not None
   2320 
   2321     if check_syntax(normalize_multiline(fixed)
   2322                     if aggressive > 1 else fixed):
   2323         return indentation + fixed
   2324     else:
   2325         return None
   2326 
   2327 
   2328 def token_offsets(tokens):
   2329     """Yield tokens and offsets."""
   2330     end_offset = 0
   2331     previous_end_row = 0
   2332     previous_end_column = 0
   2333     for t in tokens:
   2334         token_type = t[0]
   2335         token_string = t[1]
   2336         (start_row, start_column) = t[2]
   2337         (end_row, end_column) = t[3]
   2338 
   2339         # Account for the whitespace between tokens.
   2340         end_offset += start_column
   2341         if previous_end_row == start_row:
   2342             end_offset -= previous_end_column
   2343 
   2344         # Record the start offset of the token.
   2345         start_offset = end_offset
   2346 
   2347         # Account for the length of the token itself.
   2348         end_offset += len(token_string)
   2349 
   2350         yield (token_type,
   2351                token_string,
   2352                start_offset,
   2353                end_offset)
   2354 
   2355         previous_end_row = end_row
   2356         previous_end_column = end_column
   2357 
   2358 
   2359 def normalize_multiline(line):
   2360     """Normalize multiline-related code that will cause syntax error.
   2361 
   2362     This is for purposes of checking syntax.
   2363 
   2364     """
   2365     if line.startswith('def ') and line.rstrip().endswith(':'):
   2366         return line + ' pass'
   2367     elif line.startswith('return '):
   2368         return 'def _(): ' + line
   2369     elif line.startswith('@'):
   2370         return line + 'def _(): pass'
   2371     elif line.startswith('class '):
   2372         return line + ' pass'
   2373     elif line.startswith('if '):
   2374         return line + ' pass'
   2375     else:
   2376         return line
   2377 
   2378 
   2379 def fix_whitespace(line, offset, replacement):
   2380     """Replace whitespace at offset and return fixed line."""
   2381     # Replace escaped newlines too
   2382     left = line[:offset].rstrip('\n\r \t\\')
   2383     right = line[offset:].lstrip('\n\r \t\\')
   2384     if right.startswith('#'):
   2385         return line
   2386     else:
   2387         return left + replacement + right
   2388 
   2389 
   2390 def _execute_pep8(pep8_options, source):
   2391     """Execute pep8 via python method calls."""
   2392     class QuietReport(pep8.BaseReport):
   2393 
   2394         """Version of checker that does not print."""
   2395 
   2396         def __init__(self, options):
   2397             super(QuietReport, self).__init__(options)
   2398             self.__full_error_results = []
   2399 
   2400         def error(self, line_number, offset, text, _):
   2401             """Collect errors."""
   2402             code = super(QuietReport, self).error(line_number, offset, text, _)
   2403             if code:
   2404                 self.__full_error_results.append(
   2405                     {'id': code,
   2406                      'line': line_number,
   2407                      'column': offset + 1,
   2408                      'info': text})
   2409 
   2410         def full_error_results(self):
   2411             """Return error results in detail.
   2412 
   2413             Results are in the form of a list of dictionaries. Each
   2414             dictionary contains 'id', 'line', 'column', and 'info'.
   2415 
   2416             """
   2417             return self.__full_error_results
   2418 
   2419     checker = pep8.Checker('', lines=source,
   2420                            reporter=QuietReport, **pep8_options)
   2421     checker.check_all()
   2422     return checker.report.full_error_results()
   2423 
   2424 
   2425 def _remove_leading_and_normalize(line):
   2426     return line.lstrip().rstrip(CR + LF) + '\n'
   2427 
   2428 
   2429 class Reindenter(object):
   2430 
   2431     """Reindents badly-indented code to uniformly use four-space indentation.
   2432 
   2433     Released to the public domain, by Tim Peters, 03 October 2000.
   2434 
   2435     """
   2436 
   2437     def __init__(self, input_text):
   2438         sio = io.StringIO(input_text)
   2439         source_lines = sio.readlines()
   2440 
   2441         self.string_content_line_numbers = multiline_string_lines(input_text)
   2442 
   2443         # File lines, rstripped & tab-expanded. Dummy at start is so
   2444         # that we can use tokenize's 1-based line numbering easily.
   2445         # Note that a line is all-blank iff it is a newline.
   2446         self.lines = []
   2447         for line_number, line in enumerate(source_lines, start=1):
   2448             # Do not modify if inside a multiline string.
   2449             if line_number in self.string_content_line_numbers:
   2450                 self.lines.append(line)
   2451             else:
   2452                 # Only expand leading tabs.
   2453                 self.lines.append(_get_indentation(line).expandtabs() +
   2454                                   _remove_leading_and_normalize(line))
   2455 
   2456         self.lines.insert(0, None)
   2457         self.index = 1  # index into self.lines of next line
   2458         self.input_text = input_text
   2459 
   2460     def run(self, indent_size=DEFAULT_INDENT_SIZE):
   2461         """Fix indentation and return modified line numbers.
   2462 
   2463         Line numbers are indexed at 1.
   2464 
   2465         """
   2466         if indent_size < 1:
   2467             return self.input_text
   2468 
   2469         try:
   2470             stats = _reindent_stats(tokenize.generate_tokens(self.getline))
   2471         except (SyntaxError, tokenize.TokenError):
   2472             return self.input_text
   2473         # Remove trailing empty lines.
   2474         lines = self.lines
   2475         while lines and lines[-1] == '\n':
   2476             lines.pop()
   2477         # Sentinel.
   2478         stats.append((len(lines), 0))
   2479         # Map count of leading spaces to # we want.
   2480         have2want = {}
   2481         # Program after transformation.
   2482         after = []
   2483         # Copy over initial empty lines -- there's nothing to do until
   2484         # we see a line with *something* on it.
   2485         i = stats[0][0]
   2486         after.extend(lines[1:i])
   2487         for i in range(len(stats) - 1):
   2488             thisstmt, thislevel = stats[i]
   2489             nextstmt = stats[i + 1][0]
   2490             have = _leading_space_count(lines[thisstmt])
   2491             want = thislevel * indent_size
   2492             if want < 0:
   2493                 # A comment line.
   2494                 if have:
   2495                     # An indented comment line. If we saw the same
   2496                     # indentation before, reuse what it most recently
   2497                     # mapped to.
   2498                     want = have2want.get(have, -1)
   2499                     if want < 0:
   2500                         # Then it probably belongs to the next real stmt.
   2501                         for j in range(i + 1, len(stats) - 1):
   2502                             jline, jlevel = stats[j]
   2503                             if jlevel >= 0:
   2504                                 if have == _leading_space_count(lines[jline]):
   2505                                     want = jlevel * indent_size
   2506                                 break
   2507                     if want < 0:            # Maybe it's a hanging
   2508                                             # comment like this one,
   2509                         # in which case we should shift it like its base
   2510                         # line got shifted.
   2511                         for j in range(i - 1, -1, -1):
   2512                             jline, jlevel = stats[j]
   2513                             if jlevel >= 0:
   2514                                 want = (have + _leading_space_count(
   2515                                         after[jline - 1]) -
   2516                                         _leading_space_count(lines[jline]))
   2517                                 break
   2518                     if want < 0:
   2519                         # Still no luck -- leave it alone.
   2520                         want = have
   2521                 else:
   2522                     want = 0
   2523             assert want >= 0
   2524             have2want[have] = want
   2525             diff = want - have
   2526             if diff == 0 or have == 0:
   2527                 after.extend(lines[thisstmt:nextstmt])
   2528             else:
   2529                 for line_number, line in enumerate(lines[thisstmt:nextstmt],
   2530                                                    start=thisstmt):
   2531                     if line_number in self.string_content_line_numbers:
   2532                         after.append(line)
   2533                     elif diff > 0:
   2534                         if line == '\n':
   2535                             after.append(line)
   2536                         else:
   2537                             after.append(' ' * diff + line)
   2538                     else:
   2539                         remove = min(_leading_space_count(line), -diff)
   2540                         after.append(line[remove:])
   2541 
   2542         return ''.join(after)
   2543 
   2544     def getline(self):
   2545         """Line-getter for tokenize."""
   2546         if self.index >= len(self.lines):
   2547             line = ''
   2548         else:
   2549             line = self.lines[self.index]
   2550             self.index += 1
   2551         return line
   2552 
   2553 
   2554 def _reindent_stats(tokens):
   2555     """Return list of (lineno, indentlevel) pairs.
   2556 
   2557     One for each stmt and comment line. indentlevel is -1 for comment lines, as
   2558     a signal that tokenize doesn't know what to do about them; indeed, they're
   2559     our headache!
   2560 
   2561     """
   2562     find_stmt = 1  # Next token begins a fresh stmt?
   2563     level = 0  # Current indent level.
   2564     stats = []
   2565 
   2566     for t in tokens:
   2567         token_type = t[0]
   2568         sline = t[2][0]
   2569         line = t[4]
   2570 
   2571         if token_type == tokenize.NEWLINE:
   2572             # A program statement, or ENDMARKER, will eventually follow,
   2573             # after some (possibly empty) run of tokens of the form
   2574             #     (NL | COMMENT)* (INDENT | DEDENT+)?
   2575             find_stmt = 1
   2576 
   2577         elif token_type == tokenize.INDENT:
   2578             find_stmt = 1
   2579             level += 1
   2580 
   2581         elif token_type == tokenize.DEDENT:
   2582             find_stmt = 1
   2583             level -= 1
   2584 
   2585         elif token_type == tokenize.COMMENT:
   2586             if find_stmt:
   2587                 stats.append((sline, -1))
   2588                 # But we're still looking for a new stmt, so leave
   2589                 # find_stmt alone.
   2590 
   2591         elif token_type == tokenize.NL:
   2592             pass
   2593 
   2594         elif find_stmt:
   2595             # This is the first "real token" following a NEWLINE, so it
   2596             # must be the first token of the next program statement, or an
   2597             # ENDMARKER.
   2598             find_stmt = 0
   2599             if line:   # Not endmarker.
   2600                 stats.append((sline, level))
   2601 
   2602     return stats
   2603 
   2604 
   2605 def _leading_space_count(line):
   2606     """Return number of leading spaces in line."""
   2607     i = 0
   2608     while i < len(line) and line[i] == ' ':
   2609         i += 1
   2610     return i
   2611 
   2612 
   2613 def refactor_with_2to3(source_text, fixer_names):
   2614     """Use lib2to3 to refactor the source.
   2615 
   2616     Return the refactored source code.
   2617 
   2618     """
   2619     from lib2to3.refactor import RefactoringTool
   2620     fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
   2621     tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
   2622 
   2623     from lib2to3.pgen2 import tokenize as lib2to3_tokenize
   2624     try:
   2625         return unicode(tool.refactor_string(source_text, name=''))
   2626     except lib2to3_tokenize.TokenError:
   2627         return source_text
   2628 
   2629 
   2630 def check_syntax(code):
   2631     """Return True if syntax is okay."""
   2632     try:
   2633         return compile(code, '<string>', 'exec')
   2634     except (SyntaxError, TypeError, UnicodeDecodeError):
   2635         return False
   2636 
   2637 
   2638 def filter_results(source, results, aggressive):
   2639     """Filter out spurious reports from pep8.
   2640 
   2641     If aggressive is True, we allow possibly unsafe fixes (E711, E712).
   2642 
   2643     """
   2644     non_docstring_string_line_numbers = multiline_string_lines(
   2645         source, include_docstrings=False)
   2646     all_string_line_numbers = multiline_string_lines(
   2647         source, include_docstrings=True)
   2648 
   2649     commented_out_code_line_numbers = commented_out_code_lines(source)
   2650 
   2651     for r in results:
   2652         issue_id = r['id'].lower()
   2653 
   2654         if r['line'] in non_docstring_string_line_numbers:
   2655             if issue_id.startswith(('e1', 'e501', 'w191')):
   2656                 continue
   2657 
   2658         if r['line'] in all_string_line_numbers:
   2659             if issue_id in ['e501']:
   2660                 continue
   2661 
   2662         # We must offset by 1 for lines that contain the trailing contents of
   2663         # multiline strings.
   2664         if not aggressive and (r['line'] + 1) in all_string_line_numbers:
   2665             # Do not modify multiline strings in non-aggressive mode. Remove
   2666             # trailing whitespace could break doctests.
   2667             if issue_id.startswith(('w29', 'w39')):
   2668                 continue
   2669 
   2670         if aggressive <= 0:
   2671             if issue_id.startswith(('e711', 'w6')):
   2672                 continue
   2673 
   2674         if aggressive <= 1:
   2675             if issue_id.startswith(('e712', 'e713')):
   2676                 continue
   2677 
   2678         if r['line'] in commented_out_code_line_numbers:
   2679             if issue_id.startswith(('e26', 'e501')):
   2680                 continue
   2681 
   2682         yield r
   2683 
   2684 
   2685 def multiline_string_lines(source, include_docstrings=False):
   2686     """Return line numbers that are within multiline strings.
   2687 
   2688     The line numbers are indexed at 1.
   2689 
   2690     Docstrings are ignored.
   2691 
   2692     """
   2693     line_numbers = set()
   2694     previous_token_type = ''
   2695     try:
   2696         for t in generate_tokens(source):
   2697             token_type = t[0]
   2698             start_row = t[2][0]
   2699             end_row = t[3][0]
   2700 
   2701             if token_type == tokenize.STRING and start_row != end_row:
   2702                 if (
   2703                     include_docstrings or
   2704                     previous_token_type != tokenize.INDENT
   2705                 ):
   2706                     # We increment by one since we want the contents of the
   2707                     # string.
   2708                     line_numbers |= set(range(1 + start_row, 1 + end_row))
   2709 
   2710             previous_token_type = token_type
   2711     except (SyntaxError, tokenize.TokenError):
   2712         pass
   2713 
   2714     return line_numbers
   2715 
   2716 
   2717 def commented_out_code_lines(source):
   2718     """Return line numbers of comments that are likely code.
   2719 
   2720     Commented-out code is bad practice, but modifying it just adds even more
   2721     clutter.
   2722 
   2723     """
   2724     line_numbers = []
   2725     try:
   2726         for t in generate_tokens(source):
   2727             token_type = t[0]
   2728             token_string = t[1]
   2729             start_row = t[2][0]
   2730             line = t[4]
   2731 
   2732             # Ignore inline comments.
   2733             if not line.lstrip().startswith('#'):
   2734                 continue
   2735 
   2736             if token_type == tokenize.COMMENT:
   2737                 stripped_line = token_string.lstrip('#').strip()
   2738                 if (
   2739                     ' ' in stripped_line and
   2740                     '#' not in stripped_line and
   2741                     check_syntax(stripped_line)
   2742                 ):
   2743                     line_numbers.append(start_row)
   2744     except (SyntaxError, tokenize.TokenError):
   2745         pass
   2746 
   2747     return line_numbers
   2748 
   2749 
   2750 def shorten_comment(line, max_line_length, last_comment=False):
   2751     """Return trimmed or split long comment line.
   2752 
   2753     If there are no comments immediately following it, do a text wrap.
   2754     Doing this wrapping on all comments in general would lead to jagged
   2755     comment text.
   2756 
   2757     """
   2758     assert len(line) > max_line_length
   2759     line = line.rstrip()
   2760 
   2761     # PEP 8 recommends 72 characters for comment text.
   2762     indentation = _get_indentation(line) + '# '
   2763     max_line_length = min(max_line_length,
   2764                           len(indentation) + 72)
   2765 
   2766     MIN_CHARACTER_REPEAT = 5
   2767     if (
   2768         len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
   2769         not line[-1].isalnum()
   2770     ):
   2771         # Trim comments that end with things like ---------
   2772         return line[:max_line_length] + '\n'
   2773     elif last_comment and re.match(r'\s*#+\s*\w+', line):
   2774         import textwrap
   2775         split_lines = textwrap.wrap(line.lstrip(' \t#'),
   2776                                     initial_indent=indentation,
   2777                                     subsequent_indent=indentation,
   2778                                     width=max_line_length,
   2779                                     break_long_words=False,
   2780                                     break_on_hyphens=False)
   2781         return '\n'.join(split_lines) + '\n'
   2782     else:
   2783         return line + '\n'
   2784 
   2785 
   2786 def normalize_line_endings(lines, newline):
   2787     """Return fixed line endings.
   2788 
   2789     All lines will be modified to use the most common line ending.
   2790 
   2791     """
   2792     return [line.rstrip('\n\r') + newline for line in lines]
   2793 
   2794 
   2795 def mutual_startswith(a, b):
   2796     return b.startswith(a) or a.startswith(b)
   2797 
   2798 
   2799 def code_match(code, select, ignore):
   2800     if ignore:
   2801         assert not isinstance(ignore, unicode)
   2802         for ignored_code in [c.strip() for c in ignore]:
   2803             if mutual_startswith(code.lower(), ignored_code.lower()):
   2804                 return False
   2805 
   2806     if select:
   2807         assert not isinstance(select, unicode)
   2808         for selected_code in [c.strip() for c in select]:
   2809             if mutual_startswith(code.lower(), selected_code.lower()):
   2810                 return True
   2811         return False
   2812 
   2813     return True
   2814 
   2815 
   2816 def fix_code(source, options=None):
   2817     """Return fixed source code."""
   2818     if not options:
   2819         options = parse_args([''])
   2820 
   2821     if not isinstance(source, unicode):
   2822         source = source.decode(locale.getpreferredencoding())
   2823 
   2824     sio = io.StringIO(source)
   2825     return fix_lines(sio.readlines(), options=options)
   2826 
   2827 
   2828 def fix_lines(source_lines, options, filename=''):
   2829     """Return fixed source code."""
   2830     # Transform everything to line feed. Then change them back to original
   2831     # before returning fixed source code.
   2832     original_newline = find_newline(source_lines)
   2833     tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
   2834 
   2835     # Keep a history to break out of cycles.
   2836     previous_hashes = set()
   2837 
   2838     if options.line_range:
   2839         fixed_source = apply_local_fixes(tmp_source, options)
   2840     else:
   2841         # Apply global fixes only once (for efficiency).
   2842         fixed_source = apply_global_fixes(tmp_source, options)
   2843 
   2844     passes = 0
   2845     long_line_ignore_cache = set()
   2846     while hash(fixed_source) not in previous_hashes:
   2847         if options.pep8_passes >= 0 and passes > options.pep8_passes:
   2848             break
   2849         passes += 1
   2850 
   2851         previous_hashes.add(hash(fixed_source))
   2852 
   2853         tmp_source = copy.copy(fixed_source)
   2854 
   2855         fix = FixPEP8(
   2856             filename,
   2857             options,
   2858             contents=tmp_source,
   2859             long_line_ignore_cache=long_line_ignore_cache)
   2860 
   2861         fixed_source = fix.fix()
   2862 
   2863     sio = io.StringIO(fixed_source)
   2864     return ''.join(normalize_line_endings(sio.readlines(), original_newline))
   2865 
   2866 
   2867 def fix_file(filename, options=None, output=None):
   2868     if not options:
   2869         options = parse_args([filename])
   2870 
   2871     original_source = readlines_from_file(filename)
   2872 
   2873     fixed_source = original_source
   2874 
   2875     if options.in_place or output:
   2876         encoding = detect_encoding(filename)
   2877 
   2878     if output:
   2879         output = codecs.getwriter(encoding)(output.buffer
   2880                                             if hasattr(output, 'buffer')
   2881                                             else output)
   2882 
   2883         output = LineEndingWrapper(output)
   2884 
   2885     fixed_source = fix_lines(fixed_source, options, filename=filename)
   2886 
   2887     if options.diff:
   2888         new = io.StringIO(fixed_source)
   2889         new = new.readlines()
   2890         diff = get_diff_text(original_source, new, filename)
   2891         if output:
   2892             output.write(diff)
   2893             output.flush()
   2894         else:
   2895             return diff
   2896     elif options.in_place:
   2897         fp = open_with_encoding(filename, encoding=encoding,
   2898                                 mode='w')
   2899         fp.write(fixed_source)
   2900         fp.close()
   2901     else:
   2902         if output:
   2903             output.write(fixed_source)
   2904             output.flush()
   2905         else:
   2906             return fixed_source
   2907 
   2908 
   2909 def global_fixes():
   2910     """Yield multiple (code, function) tuples."""
   2911     for function in globals().values():
   2912         if inspect.isfunction(function):
   2913             arguments = inspect.getargspec(function)[0]
   2914             if arguments[:1] != ['source']:
   2915                 continue
   2916 
   2917             code = extract_code_from_function(function)
   2918             if code:
   2919                 yield (code, function)
   2920 
   2921 
   2922 def apply_global_fixes(source, options, where='global'):
   2923     """Run global fixes on source code.
   2924 
   2925     These are fixes that only need be done once (unlike those in
   2926     FixPEP8, which are dependent on pep8).
   2927 
   2928     """
   2929     if code_match('E101', select=options.select, ignore=options.ignore):
   2930         source = reindent(source,
   2931                           indent_size=options.indent_size)
   2932 
   2933     for (code, function) in global_fixes():
   2934         if code_match(code, select=options.select, ignore=options.ignore):
   2935             if options.verbose:
   2936                 print('--->  Applying {0} fix for {1}'.format(where,
   2937                                                               code.upper()),
   2938                       file=sys.stderr)
   2939             source = function(source,
   2940                               aggressive=options.aggressive)
   2941 
   2942     source = fix_2to3(source,
   2943                       aggressive=options.aggressive,
   2944                       select=options.select,
   2945                       ignore=options.ignore)
   2946 
   2947     return source
   2948 
   2949 
   2950 def apply_local_fixes(source, options):
   2951     """Ananologus to apply_global_fixes, but runs only those which makes sense
   2952     for the given line_range.
   2953 
   2954     Do as much as we can without breaking code.
   2955 
   2956     """
   2957     def find_ge(a, x):
   2958         """Find leftmost item greater than or equal to x."""
   2959         i = bisect.bisect_left(a, x)
   2960         if i != len(a):
   2961             return i, a[i]
   2962         return len(a) - 1, a[-1]
   2963 
   2964     def find_le(a, x):
   2965         """Find rightmost value less than or equal to x."""
   2966         i = bisect.bisect_right(a, x)
   2967         if i:
   2968             return i - 1, a[i - 1]
   2969         return 0, a[0]
   2970 
   2971     def local_fix(source, start_log, end_log,
   2972                   start_lines, end_lines, indents, last_line):
   2973         """apply_global_fixes to the source between start_log and end_log.
   2974 
   2975         The subsource must be the correct syntax of a complete python program
   2976         (but all lines may share an indentation). The subsource's shared indent
   2977         is removed, fixes are applied and the indent prepended back. Taking
   2978         care to not reindent strings.
   2979 
   2980         last_line is the strict cut off (options.line_range[1]), so that
   2981         lines after last_line are not modified.
   2982 
   2983         """
   2984         if end_log < start_log:
   2985             return source
   2986 
   2987         ind = indents[start_log]
   2988         indent = _get_indentation(source[start_lines[start_log]])
   2989 
   2990         sl = slice(start_lines[start_log], end_lines[end_log] + 1)
   2991 
   2992         subsource = source[sl]
   2993         # Remove indent from subsource.
   2994         if ind:
   2995             for line_no in start_lines[start_log:end_log + 1]:
   2996                 pos = line_no - start_lines[start_log]
   2997                 subsource[pos] = subsource[pos][ind:]
   2998 
   2999         # Fix indentation of subsource.
   3000         fixed_subsource = apply_global_fixes(''.join(subsource),
   3001                                              options,
   3002                                              where='local')
   3003         fixed_subsource = fixed_subsource.splitlines(True)
   3004 
   3005         # Add back indent for non multi-line strings lines.
   3006         msl = multiline_string_lines(''.join(fixed_subsource),
   3007                                      include_docstrings=False)
   3008         for i, line in enumerate(fixed_subsource):
   3009             if not i + 1 in msl:
   3010                 fixed_subsource[i] = indent + line if line != '\n' else line
   3011 
   3012         # We make a special case to look at the final line, if it's a multiline
   3013         # *and* the cut off is somewhere inside it, we take the fixed
   3014         # subset up until last_line, this assumes that the number of lines
   3015         # does not change in this multiline line.
   3016         changed_lines = len(fixed_subsource)
   3017         if (start_lines[end_log] != end_lines[end_log]
   3018                 and end_lines[end_log] > last_line):
   3019             after_end = end_lines[end_log] - last_line
   3020             fixed_subsource = (fixed_subsource[:-after_end] +
   3021                                source[sl][-after_end:])
   3022             changed_lines -= after_end
   3023 
   3024             options.line_range[1] = (options.line_range[0] +
   3025                                      changed_lines - 1)
   3026 
   3027         return (source[:start_lines[start_log]] +
   3028                 fixed_subsource +
   3029                 source[end_lines[end_log] + 1:])
   3030 
   3031     def is_continued_stmt(line,
   3032                           continued_stmts=frozenset(['else', 'elif',
   3033                                                      'finally', 'except'])):
   3034         return re.split('[ :]', line.strip(), 1)[0] in continued_stmts
   3035 
   3036     assert options.line_range
   3037     start, end = options.line_range
   3038     start -= 1
   3039     end -= 1
   3040     last_line = end  # We shouldn't modify lines after this cut-off.
   3041 
   3042     try:
   3043         logical = _find_logical(source)
   3044     except (SyntaxError, tokenize.TokenError):
   3045         return ''.join(source)
   3046 
   3047     if not logical[0]:
   3048         # Just blank lines, this should imply that it will become '\n' ?
   3049         return apply_global_fixes(source, options)
   3050 
   3051     start_lines, indents = zip(*logical[0])
   3052     end_lines, _ = zip(*logical[1])
   3053 
   3054     source = source.splitlines(True)
   3055 
   3056     start_log, start = find_ge(start_lines, start)
   3057     end_log, end = find_le(start_lines, end)
   3058 
   3059     # Look behind one line, if it's indented less than current indent
   3060     # then we can move to this previous line knowing that its
   3061     # indentation level will not be changed.
   3062     if (start_log > 0
   3063             and indents[start_log - 1] < indents[start_log]
   3064             and not is_continued_stmt(source[start_log - 1])):
   3065         start_log -= 1
   3066         start = start_lines[start_log]
   3067 
   3068     while start < end:
   3069 
   3070         if is_continued_stmt(source[start]):
   3071             start_log += 1
   3072             start = start_lines[start_log]
   3073             continue
   3074 
   3075         ind = indents[start_log]
   3076         for t in itertools.takewhile(lambda t: t[1][1] >= ind,
   3077                                      enumerate(logical[0][start_log:])):
   3078             n_log, n = start_log + t[0], t[1][0]
   3079         # start shares indent up to n.
   3080 
   3081         if n <= end:
   3082             source = local_fix(source, start_log, n_log,
   3083                                start_lines, end_lines,
   3084                                indents, last_line)
   3085             start_log = n_log if n == end else n_log + 1
   3086             start = start_lines[start_log]
   3087             continue
   3088 
   3089         else:
   3090             # Look at the line after end and see if allows us to reindent.
   3091             after_end_log, after_end = find_ge(start_lines, end + 1)
   3092 
   3093             if indents[after_end_log] > indents[start_log]:
   3094                 start_log, start = find_ge(start_lines, start + 1)
   3095                 continue
   3096 
   3097             if (indents[after_end_log] == indents[start_log]
   3098                     and is_continued_stmt(source[after_end])):
   3099                 # find n, the beginning of the last continued statement
   3100                 # Apply fix to previous block if there is one.
   3101                 only_block = True
   3102                 for n, n_ind in logical[0][start_log:end_log + 1][::-1]:
   3103                     if n_ind == ind and not is_continued_stmt(source[n]):
   3104                         n_log = start_lines.index(n)
   3105                         source = local_fix(source, start_log, n_log - 1,
   3106                                            start_lines, end_lines,
   3107                                            indents, last_line)
   3108                         start_log = n_log + 1
   3109                         start = start_lines[start_log]
   3110                         only_block = False
   3111                         break
   3112                 if only_block:
   3113                     end_log, end = find_le(start_lines, end - 1)
   3114                 continue
   3115 
   3116             source = local_fix(source, start_log, end_log,
   3117                                start_lines, end_lines,
   3118                                indents, last_line)
   3119             break
   3120 
   3121     return ''.join(source)
   3122 
   3123 
   3124 def extract_code_from_function(function):
   3125     """Return code handled by function."""
   3126     if not function.__name__.startswith('fix_'):
   3127         return None
   3128 
   3129     code = re.sub('^fix_', '', function.__name__)
   3130     if not code:
   3131         return None
   3132 
   3133     try:
   3134         int(code[1:])
   3135     except ValueError:
   3136         return None
   3137 
   3138     return code
   3139 
   3140 
   3141 def create_parser():
   3142     """Return command-line parser."""
   3143     # Do import locally to be friendly to those who use autopep8 as a library
   3144     # and are supporting Python 2.6.
   3145     import argparse
   3146 
   3147     parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
   3148                                      prog='autopep8')
   3149     parser.add_argument('--version', action='version',
   3150                         version='%(prog)s ' + __version__)
   3151     parser.add_argument('-v', '--verbose', action='count', dest='verbose',
   3152                         default=0,
   3153                         help='print verbose messages; '
   3154                         'multiple -v result in more verbose messages')
   3155     parser.add_argument('-d', '--diff', action='store_true', dest='diff',
   3156                         help='print the diff for the fixed source')
   3157     parser.add_argument('-i', '--in-place', action='store_true',
   3158                         help='make changes to files in place')
   3159     parser.add_argument('-r', '--recursive', action='store_true',
   3160                         help='run recursively over directories; '
   3161                         'must be used with --in-place or --diff')
   3162     parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
   3163                         help='number of parallel jobs; '
   3164                         'match CPU count if value is less than 1')
   3165     parser.add_argument('-p', '--pep8-passes', metavar='n',
   3166                         default=-1, type=int,
   3167                         help='maximum number of additional pep8 passes '
   3168                         '(default: infinite)')
   3169     parser.add_argument('-a', '--aggressive', action='count', default=0,
   3170                         help='enable non-whitespace changes; '
   3171                         'multiple -a result in more aggressive changes')
   3172     parser.add_argument('--experimental', action='store_true',
   3173                         help='enable experimental fixes')
   3174     parser.add_argument('--exclude', metavar='globs',
   3175                         help='exclude file/directory names that match these '
   3176                         'comma-separated globs')
   3177     parser.add_argument('--list-fixes', action='store_true',
   3178                         help='list codes for fixes; '
   3179                         'used by --ignore and --select')
   3180     parser.add_argument('--ignore', metavar='errors', default='',
   3181                         help='do not fix these errors/warnings '
   3182                         '(default: {0})'.format(DEFAULT_IGNORE))
   3183     parser.add_argument('--select', metavar='errors', default='',
   3184                         help='fix only these errors/warnings (e.g. E4,W)')
   3185     parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
   3186                         help='set maximum allowed line length '
   3187                         '(default: %(default)s)')
   3188     parser.add_argument('--range', metavar='line', dest='line_range',
   3189                         default=None, type=int, nargs=2,
   3190                         help='only fix errors found within this inclusive '
   3191                         'range of line numbers (e.g. 1 99); '
   3192                         'line numbers are indexed at 1')
   3193     parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
   3194                         type=int, metavar='n',
   3195                         help='number of spaces per indent level '
   3196                              '(default %(default)s)')
   3197     parser.add_argument('files', nargs='*',
   3198                         help="files to format or '-' for standard in")
   3199 
   3200     return parser
   3201 
   3202 
   3203 def parse_args(arguments):
   3204     """Parse command-line options."""
   3205     parser = create_parser()
   3206     args = parser.parse_args(arguments)
   3207 
   3208     if not args.files and not args.list_fixes:
   3209         parser.error('incorrect number of arguments')
   3210 
   3211     args.files = [decode_filename(name) for name in args.files]
   3212 
   3213     if '-' in args.files:
   3214         if len(args.files) > 1:
   3215             parser.error('cannot mix stdin and regular files')
   3216 
   3217         if args.diff:
   3218             parser.error('--diff cannot be used with standard input')
   3219 
   3220         if args.in_place:
   3221             parser.error('--in-place cannot be used with standard input')
   3222 
   3223         if args.recursive:
   3224             parser.error('--recursive cannot be used with standard input')
   3225 
   3226     if len(args.files) > 1 and not (args.in_place or args.diff):
   3227         parser.error('autopep8 only takes one filename as argument '
   3228                      'unless the "--in-place" or "--diff" args are '
   3229                      'used')
   3230 
   3231     if args.recursive and not (args.in_place or args.diff):
   3232         parser.error('--recursive must be used with --in-place or --diff')
   3233 
   3234     if args.exclude and not args.recursive:
   3235         parser.error('--exclude is only relevant when used with --recursive')
   3236 
   3237     if args.in_place and args.diff:
   3238         parser.error('--in-place and --diff are mutually exclusive')
   3239 
   3240     if args.max_line_length <= 0:
   3241         parser.error('--max-line-length must be greater than 0')
   3242 
   3243     if args.select:
   3244         args.select = args.select.split(',')
   3245 
   3246     if args.ignore:
   3247         args.ignore = args.ignore.split(',')
   3248     elif not args.select:
   3249         if args.aggressive:
   3250             # Enable everything by default if aggressive.
   3251             args.select = ['E', 'W']
   3252         else:
   3253             args.ignore = DEFAULT_IGNORE.split(',')
   3254 
   3255     if args.exclude:
   3256         args.exclude = args.exclude.split(',')
   3257     else:
   3258         args.exclude = []
   3259 
   3260     if args.jobs < 1:
   3261         # Do not import multiprocessing globally in case it is not supported
   3262         # on the platform.
   3263         import multiprocessing
   3264         args.jobs = multiprocessing.cpu_count()
   3265 
   3266     if args.jobs > 1 and not args.in_place:
   3267         parser.error('parallel jobs requires --in-place')
   3268 
   3269     if args.line_range:
   3270         if args.line_range[0] <= 0:
   3271             parser.error('--range must be positive numbers')
   3272         if args.line_range[0] > args.line_range[1]:
   3273             parser.error('First value of --range should be less than or equal '
   3274                          'to the second')
   3275 
   3276     return args
   3277 
   3278 
   3279 def decode_filename(filename):
   3280     """Return Unicode filename."""
   3281     if isinstance(filename, unicode):
   3282         return filename
   3283     else:
   3284         return filename.decode(sys.getfilesystemencoding())
   3285 
   3286 
   3287 def supported_fixes():
   3288     """Yield pep8 error codes that autopep8 fixes.
   3289 
   3290     Each item we yield is a tuple of the code followed by its
   3291     description.
   3292 
   3293     """
   3294     yield ('E101', docstring_summary(reindent.__doc__))
   3295 
   3296     instance = FixPEP8(filename=None, options=None, contents='')
   3297     for attribute in dir(instance):
   3298         code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
   3299         if code:
   3300             yield (
   3301                 code.group(1).upper(),
   3302                 re.sub(r'\s+', ' ',
   3303                        docstring_summary(getattr(instance, attribute).__doc__))
   3304             )
   3305 
   3306     for (code, function) in sorted(global_fixes()):
   3307         yield (code.upper() + (4 - len(code)) * ' ',
   3308                re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
   3309 
   3310     for code in sorted(CODE_TO_2TO3):
   3311         yield (code.upper() + (4 - len(code)) * ' ',
   3312                re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
   3313 
   3314 
   3315 def docstring_summary(docstring):
   3316     """Return summary of docstring."""
   3317     return docstring.split('\n')[0]
   3318 
   3319 
   3320 def line_shortening_rank(candidate, indent_word, max_line_length,
   3321                          experimental=False):
   3322     """Return rank of candidate.
   3323 
   3324     This is for sorting candidates.
   3325 
   3326     """
   3327     if not candidate.strip():
   3328         return 0
   3329 
   3330     rank = 0
   3331     lines = candidate.split('\n')
   3332 
   3333     offset = 0
   3334     if (
   3335         not lines[0].lstrip().startswith('#') and
   3336         lines[0].rstrip()[-1] not in '([{'
   3337     ):
   3338         for (opening, closing) in ('()', '[]', '{}'):
   3339             # Don't penalize empty containers that aren't split up. Things like
   3340             # this "foo(\n    )" aren't particularly good.
   3341             opening_loc = lines[0].find(opening)
   3342             closing_loc = lines[0].find(closing)
   3343             if opening_loc >= 0:
   3344                 if closing_loc < 0 or closing_loc != opening_loc + 1:
   3345                     offset = max(offset, 1 + opening_loc)
   3346 
   3347     current_longest = max(offset + len(x.strip()) for x in lines)
   3348 
   3349     rank += 4 * max(0, current_longest - max_line_length)
   3350 
   3351     rank += len(lines)
   3352 
   3353     # Too much variation in line length is ugly.
   3354     rank += 2 * standard_deviation(len(line) for line in lines)
   3355 
   3356     bad_staring_symbol = {
   3357         '(': ')',
   3358         '[': ']',
   3359         '{': '}'}.get(lines[0][-1])
   3360 
   3361     if len(lines) > 1:
   3362         if (
   3363             bad_staring_symbol and
   3364             lines[1].lstrip().startswith(bad_staring_symbol)
   3365         ):
   3366             rank += 20
   3367 
   3368     for lineno, current_line in enumerate(lines):
   3369         current_line = current_line.strip()
   3370 
   3371         if current_line.startswith('#'):
   3372             continue
   3373 
   3374         for bad_start in ['.', '%', '+', '-', '/']:
   3375             if current_line.startswith(bad_start):
   3376                 rank += 100
   3377 
   3378             # Do not tolerate operators on their own line.
   3379             if current_line == bad_start:
   3380                 rank += 1000
   3381 
   3382         if current_line.endswith(('(', '[', '{', '.')):
   3383             # Avoid lonely opening. They result in longer lines.
   3384             if len(current_line) <= len(indent_word):
   3385                 rank += 100
   3386 
   3387             # Avoid the ugliness of ", (\n".
   3388             if (
   3389                 current_line.endswith('(') and
   3390                 current_line[:-1].rstrip().endswith(',')
   3391             ):
   3392                 rank += 100
   3393 
   3394             # Also avoid the ugliness of "foo.\nbar"
   3395             if current_line.endswith('.'):
   3396                 rank += 100
   3397 
   3398             if has_arithmetic_operator(current_line):
   3399                 rank += 100
   3400 
   3401         if current_line.endswith(('%', '(', '[', '{')):
   3402             rank -= 20
   3403 
   3404         # Try to break list comprehensions at the "for".
   3405         if current_line.startswith('for '):
   3406             rank -= 50
   3407 
   3408         if current_line.endswith('\\'):
   3409             # If a line ends in \-newline, it may be part of a
   3410             # multiline string. In that case, we would like to know
   3411             # how long that line is without the \-newline. If it's
   3412             # longer than the maximum, or has comments, then we assume
   3413             # that the \-newline is an okay candidate and only
   3414             # penalize it a bit.
   3415             total_len = len(current_line)
   3416             lineno += 1
   3417             while lineno < len(lines):
   3418                 total_len += len(lines[lineno])
   3419 
   3420                 if lines[lineno].lstrip().startswith('#'):
   3421                     total_len = max_line_length
   3422                     break
   3423 
   3424                 if not lines[lineno].endswith('\\'):
   3425                     break
   3426 
   3427                 lineno += 1
   3428 
   3429             if total_len < max_line_length:
   3430                 rank += 10
   3431             else:
   3432                 rank += 100 if experimental else 1
   3433 
   3434         # Prefer breaking at commas rather than colon.
   3435         if ',' in current_line and current_line.endswith(':'):
   3436             rank += 10
   3437 
   3438         rank += 10 * count_unbalanced_brackets(current_line)
   3439 
   3440     return max(0, rank)
   3441 
   3442 
   3443 def standard_deviation(numbers):
   3444     """Return standard devation."""
   3445     numbers = list(numbers)
   3446     if not numbers:
   3447         return 0
   3448     mean = sum(numbers) / len(numbers)
   3449     return (sum((n - mean) ** 2 for n in numbers) /
   3450             len(numbers)) ** .5
   3451 
   3452 
   3453 def has_arithmetic_operator(line):
   3454     """Return True if line contains any arithmetic operators."""
   3455     for operator in pep8.ARITHMETIC_OP:
   3456         if operator in line:
   3457             return True
   3458 
   3459     return False
   3460 
   3461 
   3462 def count_unbalanced_brackets(line):
   3463     """Return number of unmatched open/close brackets."""
   3464     count = 0
   3465     for opening, closing in ['()', '[]', '{}']:
   3466         count += abs(line.count(opening) - line.count(closing))
   3467 
   3468     return count
   3469 
   3470 
   3471 def split_at_offsets(line, offsets):
   3472     """Split line at offsets.
   3473 
   3474     Return list of strings.
   3475 
   3476     """
   3477     result = []
   3478 
   3479     previous_offset = 0
   3480     current_offset = 0
   3481     for current_offset in sorted(offsets):
   3482         if current_offset < len(line) and previous_offset != current_offset:
   3483             result.append(line[previous_offset:current_offset].strip())
   3484         previous_offset = current_offset
   3485 
   3486     result.append(line[current_offset:])
   3487 
   3488     return result
   3489 
   3490 
   3491 class LineEndingWrapper(object):
   3492 
   3493     r"""Replace line endings to work with sys.stdout.
   3494 
   3495     It seems that sys.stdout expects only '\n' as the line ending, no matter
   3496     the platform. Otherwise, we get repeated line endings.
   3497 
   3498     """
   3499 
   3500     def __init__(self, output):
   3501         self.__output = output
   3502 
   3503     def write(self, s):
   3504         self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
   3505 
   3506     def flush(self):
   3507         self.__output.flush()
   3508 
   3509 
   3510 def match_file(filename, exclude):
   3511     """Return True if file is okay for modifying/recursing."""
   3512     base_name = os.path.basename(filename)
   3513 
   3514     if base_name.startswith('.'):
   3515         return False
   3516 
   3517     for pattern in exclude:
   3518         if fnmatch.fnmatch(base_name, pattern):
   3519             return False
   3520 
   3521     if not os.path.isdir(filename) and not is_python_file(filename):
   3522         return False
   3523 
   3524     return True
   3525 
   3526 
   3527 def find_files(filenames, recursive, exclude):
   3528     """Yield filenames."""
   3529     while filenames:
   3530         name = filenames.pop(0)
   3531         if recursive and os.path.isdir(name):
   3532             for root, directories, children in os.walk(name):
   3533                 filenames += [os.path.join(root, f) for f in children
   3534                               if match_file(os.path.join(root, f),
   3535                                             exclude)]
   3536                 directories[:] = [d for d in directories
   3537                                   if match_file(os.path.join(root, d),
   3538                                                 exclude)]
   3539         else:
   3540             yield name
   3541 
   3542 
   3543 def _fix_file(parameters):
   3544     """Helper function for optionally running fix_file() in parallel."""
   3545     if parameters[1].verbose:
   3546         print('[file:{0}]'.format(parameters[0]), file=sys.stderr)
   3547     try:
   3548         fix_file(*parameters)
   3549     except IOError as error:
   3550         print(unicode(error), file=sys.stderr)
   3551 
   3552 
   3553 def fix_multiple_files(filenames, options, output=None):
   3554     """Fix list of files.
   3555 
   3556     Optionally fix files recursively.
   3557 
   3558     """
   3559     filenames = find_files(filenames, options.recursive, options.exclude)
   3560     if options.jobs > 1:
   3561         import multiprocessing
   3562         pool = multiprocessing.Pool(options.jobs)
   3563         pool.map(_fix_file,
   3564                  [(name, options) for name in filenames])
   3565     else:
   3566         for name in filenames:
   3567             _fix_file((name, options, output))
   3568 
   3569 
   3570 def is_python_file(filename):
   3571     """Return True if filename is Python file."""
   3572     if filename.endswith('.py'):
   3573         return True
   3574 
   3575     try:
   3576         with open_with_encoding(filename) as f:
   3577             first_line = f.readlines(1)[0]
   3578     except (IOError, IndexError):
   3579         return False
   3580 
   3581     if not PYTHON_SHEBANG_REGEX.match(first_line):
   3582         return False
   3583 
   3584     return True
   3585 
   3586 
   3587 def is_probably_part_of_multiline(line):
   3588     """Return True if line is likely part of a multiline string.
   3589 
   3590     When multiline strings are involved, pep8 reports the error as being
   3591     at the start of the multiline string, which doesn't work for us.
   3592 
   3593     """
   3594     return (
   3595         '"""' in line or
   3596         "'''" in line or
   3597         line.rstrip().endswith('\\')
   3598     )
   3599 
   3600 
   3601 def main():
   3602     """Tool main."""
   3603     try:
   3604         # Exit on broken pipe.
   3605         signal.signal(signal.SIGPIPE, signal.SIG_DFL)
   3606     except AttributeError:  # pragma: no cover
   3607         # SIGPIPE is not available on Windows.
   3608         pass
   3609 
   3610     try:
   3611         args = parse_args(sys.argv[1:])
   3612 
   3613         if args.list_fixes:
   3614             for code, description in sorted(supported_fixes()):
   3615                 print('{code} - {description}'.format(
   3616                     code=code, description=description))
   3617             return 0
   3618 
   3619         if args.files == ['-']:
   3620             assert not args.in_place
   3621 
   3622             # LineEndingWrapper is unnecessary here due to the symmetry between
   3623             # standard in and standard out.
   3624             sys.stdout.write(fix_code(sys.stdin.read(), args))
   3625         else:
   3626             if args.in_place or args.diff:
   3627                 args.files = list(set(args.files))
   3628             else:
   3629                 assert len(args.files) == 1
   3630                 assert not args.recursive
   3631 
   3632             fix_multiple_files(args.files, args, sys.stdout)
   3633     except KeyboardInterrupt:
   3634         return 1  # pragma: no cover
   3635 
   3636 
   3637 class CachedTokenizer(object):
   3638 
   3639     """A one-element cache around tokenize.generate_tokens().
   3640 
   3641     Original code written by Ned Batchelder, in coverage.py.
   3642 
   3643     """
   3644 
   3645     def __init__(self):
   3646         self.last_text = None
   3647         self.last_tokens = None
   3648 
   3649     def generate_tokens(self, text):
   3650         """A stand-in for tokenize.generate_tokens()."""
   3651         if text != self.last_text:
   3652             string_io = io.StringIO(text)
   3653             self.last_tokens = list(
   3654                 tokenize.generate_tokens(string_io.readline)
   3655             )
   3656             self.last_text = text
   3657         return self.last_tokens
   3658 
   3659 _cached_tokenizer = CachedTokenizer()
   3660 generate_tokens = _cached_tokenizer.generate_tokens
   3661 
   3662 
   3663 if __name__ == '__main__':
   3664     sys.exit(main())
   3665