1 # Copyright (C) 2010-2011 Hideo Hattori 2 # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint 3 # Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling 4 # 5 # Permission is hereby granted, free of charge, to any person obtaining 6 # a copy of this software and associated documentation files (the 7 # "Software"), to deal in the Software without restriction, including 8 # without limitation the rights to use, copy, modify, merge, publish, 9 # distribute, sublicense, and/or sell copies of the Software, and to 10 # permit persons to whom the Software is furnished to do so, subject to 11 # the following conditions: 12 # 13 # The above copyright notice and this permission notice shall be 14 # included in all copies or substantial portions of the Software. 15 # 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 """Automatically formats Python code to conform to the PEP 8 style guide. 26 27 Fixes that only need be done once can be added by adding a function of the form 28 "fix_<code>(source)" to this module. They should return the fixed source code. 29 These fixes are picked up by apply_global_fixes(). 30 31 Fixes that depend on pep8 should be added as methods to FixPEP8. See the class 32 documentation for more information. 33 34 """ 35 36 from __future__ import absolute_import 37 from __future__ import division 38 from __future__ import print_function 39 from __future__ import unicode_literals 40 41 import bisect 42 import codecs 43 import collections 44 import copy 45 import difflib 46 import fnmatch 47 import inspect 48 import io 49 import itertools 50 import keyword 51 import locale 52 import os 53 import re 54 import signal 55 import sys 56 import token 57 import tokenize 58 59 import pep8 60 61 62 try: 63 unicode 64 except NameError: 65 unicode = str 66 67 68 __version__ = '1.0.3' 69 70 71 CR = '\r' 72 LF = '\n' 73 CRLF = '\r\n' 74 75 76 PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$') 77 78 79 # For generating line shortening candidates. 80 SHORTEN_OPERATOR_GROUPS = frozenset([ 81 frozenset([',']), 82 frozenset(['%']), 83 frozenset([',', '(', '[', '{']), 84 frozenset(['%', '(', '[', '{']), 85 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']), 86 frozenset(['%', '+', '-', '*', '/', '//']), 87 ]) 88 89 90 DEFAULT_IGNORE = 'E24' 91 DEFAULT_INDENT_SIZE = 4 92 93 94 # W602 is handled separately due to the need to avoid "with_traceback". 95 CODE_TO_2TO3 = { 96 'E721': ['idioms'], 97 'W601': ['has_key'], 98 'W603': ['ne'], 99 'W604': ['repr'], 100 'W690': ['apply', 101 'except', 102 'exitfunc', 103 'import', 104 'numliterals', 105 'operator', 106 'paren', 107 'reduce', 108 'renames', 109 'standarderror', 110 'sys_exc', 111 'throw', 112 'tuple_params', 113 'xreadlines']} 114 115 116 def open_with_encoding(filename, encoding=None, mode='r'): 117 """Return opened file with a specific encoding.""" 118 if not encoding: 119 encoding = detect_encoding(filename) 120 121 return io.open(filename, mode=mode, encoding=encoding, 122 newline='') # Preserve line endings 123 124 125 def detect_encoding(filename): 126 """Return file encoding.""" 127 try: 128 with open(filename, 'rb') as input_file: 129 from lib2to3.pgen2 import tokenize as lib2to3_tokenize 130 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0] 131 132 # Check for correctness of encoding 133 with open_with_encoding(filename, encoding) as test_file: 134 test_file.read() 135 136 return encoding 137 except (LookupError, SyntaxError, UnicodeDecodeError): 138 return 'latin-1' 139 140 141 def readlines_from_file(filename): 142 """Return contents of file.""" 143 with open_with_encoding(filename) as input_file: 144 return input_file.readlines() 145 146 147 def extended_blank_lines(logical_line, 148 blank_lines, 149 indent_level, 150 previous_logical): 151 """Check for missing blank lines after class declaration.""" 152 if previous_logical.startswith('class '): 153 if ( 154 logical_line.startswith(('def ', 'class ', '@')) or 155 pep8.DOCSTRING_REGEX.match(logical_line) 156 ): 157 if indent_level and not blank_lines: 158 yield (0, 'E309 expected 1 blank line after class declaration') 159 elif previous_logical.startswith('def '): 160 if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line): 161 yield (0, 'E303 too many blank lines ({0})'.format(blank_lines)) 162 elif pep8.DOCSTRING_REGEX.match(previous_logical): 163 # Missing blank line between class docstring and method declaration. 164 if ( 165 indent_level and 166 not blank_lines and 167 logical_line.startswith(('def ')) and 168 '(self' in logical_line 169 ): 170 yield (0, 'E301 expected 1 blank line, found 0') 171 pep8.register_check(extended_blank_lines) 172 173 174 def continued_indentation(logical_line, tokens, indent_level, indent_char, 175 noqa): 176 """Override pep8's function to provide indentation information.""" 177 first_row = tokens[0][2][0] 178 nrows = 1 + tokens[-1][2][0] - first_row 179 if noqa or nrows == 1: 180 return 181 182 # indent_next tells us whether the next block is indented. Assuming 183 # that it is indented by 4 spaces, then we should not allow 4-space 184 # indents on the final continuation line. In turn, some other 185 # indents are allowed to have an extra 4 spaces. 186 indent_next = logical_line.endswith(':') 187 188 row = depth = 0 189 valid_hangs = ( 190 (DEFAULT_INDENT_SIZE,) 191 if indent_char != '\t' else (DEFAULT_INDENT_SIZE, 192 2 * DEFAULT_INDENT_SIZE) 193 ) 194 195 # Remember how many brackets were opened on each line. 196 parens = [0] * nrows 197 198 # Relative indents of physical lines. 199 rel_indent = [0] * nrows 200 201 # For each depth, collect a list of opening rows. 202 open_rows = [[0]] 203 # For each depth, memorize the hanging indentation. 204 hangs = [None] 205 206 # Visual indents. 207 indent_chances = {} 208 last_indent = tokens[0][2] 209 indent = [last_indent[1]] 210 211 last_token_multiline = None 212 line = None 213 last_line = '' 214 last_line_begins_with_multiline = False 215 for token_type, text, start, end, line in tokens: 216 217 newline = row < start[0] - first_row 218 if newline: 219 row = start[0] - first_row 220 newline = (not last_token_multiline and 221 token_type not in (tokenize.NL, tokenize.NEWLINE)) 222 last_line_begins_with_multiline = last_token_multiline 223 224 if newline: 225 # This is the beginning of a continuation line. 226 last_indent = start 227 228 # Record the initial indent. 229 rel_indent[row] = pep8.expand_indent(line) - indent_level 230 231 # Identify closing bracket. 232 close_bracket = (token_type == tokenize.OP and text in ']})') 233 234 # Is the indent relative to an opening bracket line? 235 for open_row in reversed(open_rows[depth]): 236 hang = rel_indent[row] - rel_indent[open_row] 237 hanging_indent = hang in valid_hangs 238 if hanging_indent: 239 break 240 if hangs[depth]: 241 hanging_indent = (hang == hangs[depth]) 242 243 visual_indent = (not close_bracket and hang > 0 and 244 indent_chances.get(start[1])) 245 246 if close_bracket and indent[depth]: 247 # Closing bracket for visual indent. 248 if start[1] != indent[depth]: 249 yield (start, 'E124 {0}'.format(indent[depth])) 250 elif close_bracket and not hang: 251 pass 252 elif indent[depth] and start[1] < indent[depth]: 253 # Visual indent is broken. 254 yield (start, 'E128 {0}'.format(indent[depth])) 255 elif (hanging_indent or 256 (indent_next and 257 rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)): 258 # Hanging indent is verified. 259 if close_bracket: 260 yield (start, 'E123 {0}'.format(indent_level + 261 rel_indent[open_row])) 262 hangs[depth] = hang 263 elif visual_indent is True: 264 # Visual indent is verified. 265 indent[depth] = start[1] 266 elif visual_indent in (text, unicode): 267 # Ignore token lined up with matching one from a previous line. 268 pass 269 else: 270 one_indented = (indent_level + rel_indent[open_row] + 271 DEFAULT_INDENT_SIZE) 272 # Indent is broken. 273 if hang <= 0: 274 error = ('E122', one_indented) 275 elif indent[depth]: 276 error = ('E127', indent[depth]) 277 elif hang > DEFAULT_INDENT_SIZE: 278 error = ('E126', one_indented) 279 else: 280 hangs[depth] = hang 281 error = ('E121', one_indented) 282 283 yield (start, '{0} {1}'.format(*error)) 284 285 # Look for visual indenting. 286 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT) 287 and not indent[depth]): 288 indent[depth] = start[1] 289 indent_chances[start[1]] = True 290 # Deal with implicit string concatenation. 291 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or 292 text in ('u', 'ur', 'b', 'br')): 293 indent_chances[start[1]] = unicode 294 # Special case for the "if" statement because len("if (") is equal to 295 # 4. 296 elif not indent_chances and not row and not depth and text == 'if': 297 indent_chances[end[1] + 1] = True 298 elif text == ':' and line[end[1]:].isspace(): 299 open_rows[depth].append(row) 300 301 # Keep track of bracket depth. 302 if token_type == tokenize.OP: 303 if text in '([{': 304 depth += 1 305 indent.append(0) 306 hangs.append(None) 307 if len(open_rows) == depth: 308 open_rows.append([]) 309 open_rows[depth].append(row) 310 parens[row] += 1 311 elif text in ')]}' and depth > 0: 312 # Parent indents should not be more than this one. 313 prev_indent = indent.pop() or last_indent[1] 314 hangs.pop() 315 for d in range(depth): 316 if indent[d] > prev_indent: 317 indent[d] = 0 318 for ind in list(indent_chances): 319 if ind >= prev_indent: 320 del indent_chances[ind] 321 del open_rows[depth + 1:] 322 depth -= 1 323 if depth: 324 indent_chances[indent[depth]] = True 325 for idx in range(row, -1, -1): 326 if parens[idx]: 327 parens[idx] -= 1 328 break 329 assert len(indent) == depth + 1 330 if ( 331 start[1] not in indent_chances and 332 # This is for purposes of speeding up E121 (GitHub #90). 333 not last_line.rstrip().endswith(',') 334 ): 335 # Allow to line up tokens. 336 indent_chances[start[1]] = text 337 338 last_token_multiline = (start[0] != end[0]) 339 if last_token_multiline: 340 rel_indent[end[0] - first_row] = rel_indent[row] 341 342 last_line = line 343 344 if ( 345 indent_next and 346 not last_line_begins_with_multiline and 347 pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE 348 ): 349 pos = (start[0], indent[0] + 4) 350 yield (pos, 'E125 {0}'.format(indent_level + 351 2 * DEFAULT_INDENT_SIZE)) 352 del pep8._checks['logical_line'][pep8.continued_indentation] 353 pep8.register_check(continued_indentation) 354 355 356 class FixPEP8(object): 357 358 """Fix invalid code. 359 360 Fixer methods are prefixed "fix_". The _fix_source() method looks for these 361 automatically. 362 363 The fixer method can take either one or two arguments (in addition to 364 self). The first argument is "result", which is the error information from 365 pep8. The second argument, "logical", is required only for logical-line 366 fixes. 367 368 The fixer method can return the list of modified lines or None. An empty 369 list would mean that no changes were made. None would mean that only the 370 line reported in the pep8 error was modified. Note that the modified line 371 numbers that are returned are indexed at 1. This typically would correspond 372 with the line number reported in the pep8 error information. 373 374 [fixed method list] 375 - e121,e122,e123,e124,e125,e126,e127,e128,e129 376 - e201,e202,e203 377 - e211 378 - e221,e222,e223,e224,e225 379 - e231 380 - e251 381 - e261,e262 382 - e271,e272,e273,e274 383 - e301,e302,e303 384 - e401 385 - e502 386 - e701,e702 387 - e711 388 - w291 389 390 """ 391 392 def __init__(self, filename, 393 options, 394 contents=None, 395 long_line_ignore_cache=None): 396 self.filename = filename 397 if contents is None: 398 self.source = readlines_from_file(filename) 399 else: 400 sio = io.StringIO(contents) 401 self.source = sio.readlines() 402 self.options = options 403 self.indent_word = _get_indentword(''.join(self.source)) 404 405 self.long_line_ignore_cache = ( 406 set() if long_line_ignore_cache is None 407 else long_line_ignore_cache) 408 409 # Many fixers are the same even though pep8 categorizes them 410 # differently. 411 self.fix_e115 = self.fix_e112 412 self.fix_e116 = self.fix_e113 413 self.fix_e121 = self._fix_reindent 414 self.fix_e122 = self._fix_reindent 415 self.fix_e123 = self._fix_reindent 416 self.fix_e124 = self._fix_reindent 417 self.fix_e126 = self._fix_reindent 418 self.fix_e127 = self._fix_reindent 419 self.fix_e128 = self._fix_reindent 420 self.fix_e129 = self._fix_reindent 421 self.fix_e202 = self.fix_e201 422 self.fix_e203 = self.fix_e201 423 self.fix_e211 = self.fix_e201 424 self.fix_e221 = self.fix_e271 425 self.fix_e222 = self.fix_e271 426 self.fix_e223 = self.fix_e271 427 self.fix_e226 = self.fix_e225 428 self.fix_e227 = self.fix_e225 429 self.fix_e228 = self.fix_e225 430 self.fix_e241 = self.fix_e271 431 self.fix_e242 = self.fix_e224 432 self.fix_e261 = self.fix_e262 433 self.fix_e272 = self.fix_e271 434 self.fix_e273 = self.fix_e271 435 self.fix_e274 = self.fix_e271 436 self.fix_e309 = self.fix_e301 437 self.fix_e501 = ( 438 self.fix_long_line_logically if 439 options and (options.aggressive >= 2 or options.experimental) else 440 self.fix_long_line_physically) 441 self.fix_e703 = self.fix_e702 442 443 self._ws_comma_done = False 444 445 def _fix_source(self, results): 446 try: 447 (logical_start, logical_end) = _find_logical(self.source) 448 logical_support = True 449 except (SyntaxError, tokenize.TokenError): # pragma: no cover 450 logical_support = False 451 452 completed_lines = set() 453 for result in sorted(results, key=_priority_key): 454 if result['line'] in completed_lines: 455 continue 456 457 fixed_methodname = 'fix_' + result['id'].lower() 458 if hasattr(self, fixed_methodname): 459 fix = getattr(self, fixed_methodname) 460 461 line_index = result['line'] - 1 462 original_line = self.source[line_index] 463 464 is_logical_fix = len(inspect.getargspec(fix).args) > 2 465 if is_logical_fix: 466 logical = None 467 if logical_support: 468 logical = _get_logical(self.source, 469 result, 470 logical_start, 471 logical_end) 472 if logical and set(range( 473 logical[0][0] + 1, 474 logical[1][0] + 1)).intersection( 475 completed_lines): 476 continue 477 478 modified_lines = fix(result, logical) 479 else: 480 modified_lines = fix(result) 481 482 if modified_lines is None: 483 # Force logical fixes to report what they modified. 484 assert not is_logical_fix 485 486 if self.source[line_index] == original_line: 487 modified_lines = [] 488 489 if modified_lines: 490 completed_lines.update(modified_lines) 491 elif modified_lines == []: # Empty list means no fix 492 if self.options.verbose >= 2: 493 print( 494 '---> Not fixing {f} on line {l}'.format( 495 f=result['id'], l=result['line']), 496 file=sys.stderr) 497 else: # We assume one-line fix when None. 498 completed_lines.add(result['line']) 499 else: 500 if self.options.verbose >= 3: 501 print( 502 "---> '{0}' is not defined.".format(fixed_methodname), 503 file=sys.stderr) 504 505 info = result['info'].strip() 506 print('---> {0}:{1}:{2}:{3}'.format(self.filename, 507 result['line'], 508 result['column'], 509 info), 510 file=sys.stderr) 511 512 def fix(self): 513 """Return a version of the source code with PEP 8 violations fixed.""" 514 pep8_options = { 515 'ignore': self.options.ignore, 516 'select': self.options.select, 517 'max_line_length': self.options.max_line_length, 518 } 519 results = _execute_pep8(pep8_options, self.source) 520 521 if self.options.verbose: 522 progress = {} 523 for r in results: 524 if r['id'] not in progress: 525 progress[r['id']] = set() 526 progress[r['id']].add(r['line']) 527 print('---> {n} issue(s) to fix {progress}'.format( 528 n=len(results), progress=progress), file=sys.stderr) 529 530 if self.options.line_range: 531 start, end = self.options.line_range 532 results = [r for r in results 533 if start <= r['line'] <= end] 534 535 self._fix_source(filter_results(source=''.join(self.source), 536 results=results, 537 aggressive=self.options.aggressive)) 538 539 if self.options.line_range: 540 # If number of lines has changed then change line_range. 541 count = sum(sline.count('\n') 542 for sline in self.source[start - 1:end]) 543 self.options.line_range[1] = start + count - 1 544 545 return ''.join(self.source) 546 547 def _fix_reindent(self, result): 548 """Fix a badly indented line. 549 550 This is done by adding or removing from its initial indent only. 551 552 """ 553 num_indent_spaces = int(result['info'].split()[1]) 554 line_index = result['line'] - 1 555 target = self.source[line_index] 556 557 self.source[line_index] = ' ' * num_indent_spaces + target.lstrip() 558 559 def fix_e112(self, result): 560 """Fix under-indented comments.""" 561 line_index = result['line'] - 1 562 target = self.source[line_index] 563 564 if not target.lstrip().startswith('#'): 565 # Don't screw with invalid syntax. 566 return [] 567 568 self.source[line_index] = self.indent_word + target 569 570 def fix_e113(self, result): 571 """Fix over-indented comments.""" 572 line_index = result['line'] - 1 573 target = self.source[line_index] 574 575 indent = _get_indentation(target) 576 stripped = target.lstrip() 577 578 if not stripped.startswith('#'): 579 # Don't screw with invalid syntax. 580 return [] 581 582 self.source[line_index] = indent[1:] + stripped 583 584 def fix_e125(self, result): 585 """Fix indentation undistinguish from the next logical line.""" 586 num_indent_spaces = int(result['info'].split()[1]) 587 line_index = result['line'] - 1 588 target = self.source[line_index] 589 590 spaces_to_add = num_indent_spaces - len(_get_indentation(target)) 591 indent = len(_get_indentation(target)) 592 modified_lines = [] 593 594 while len(_get_indentation(self.source[line_index])) >= indent: 595 self.source[line_index] = (' ' * spaces_to_add + 596 self.source[line_index]) 597 modified_lines.append(1 + line_index) # Line indexed at 1. 598 line_index -= 1 599 600 return modified_lines 601 602 def fix_e201(self, result): 603 """Remove extraneous whitespace.""" 604 line_index = result['line'] - 1 605 target = self.source[line_index] 606 offset = result['column'] - 1 607 608 if is_probably_part_of_multiline(target): 609 return [] 610 611 fixed = fix_whitespace(target, 612 offset=offset, 613 replacement='') 614 615 self.source[line_index] = fixed 616 617 def fix_e224(self, result): 618 """Remove extraneous whitespace around operator.""" 619 target = self.source[result['line'] - 1] 620 offset = result['column'] - 1 621 fixed = target[:offset] + target[offset:].replace('\t', ' ') 622 self.source[result['line'] - 1] = fixed 623 624 def fix_e225(self, result): 625 """Fix missing whitespace around operator.""" 626 target = self.source[result['line'] - 1] 627 offset = result['column'] - 1 628 fixed = target[:offset] + ' ' + target[offset:] 629 630 # Only proceed if non-whitespace characters match. 631 # And make sure we don't break the indentation. 632 if ( 633 fixed.replace(' ', '') == target.replace(' ', '') and 634 _get_indentation(fixed) == _get_indentation(target) 635 ): 636 self.source[result['line'] - 1] = fixed 637 else: 638 return [] 639 640 def fix_e231(self, result): 641 """Add missing whitespace.""" 642 # Optimize for comma case. This will fix all commas in the full source 643 # code in one pass. Don't do this more than once. If it fails the first 644 # time, there is no point in trying again. 645 if ',' in result['info'] and not self._ws_comma_done: 646 self._ws_comma_done = True 647 original = ''.join(self.source) 648 new = refactor(original, ['ws_comma']) 649 if original.strip() != new.strip(): 650 self.source = [new] 651 return range(1, 1 + len(original)) 652 653 line_index = result['line'] - 1 654 target = self.source[line_index] 655 offset = result['column'] 656 fixed = target[:offset] + ' ' + target[offset:] 657 self.source[line_index] = fixed 658 659 def fix_e251(self, result): 660 """Remove whitespace around parameter '=' sign.""" 661 line_index = result['line'] - 1 662 target = self.source[line_index] 663 664 # This is necessary since pep8 sometimes reports columns that goes 665 # past the end of the physical line. This happens in cases like, 666 # foo(bar\n=None) 667 c = min(result['column'] - 1, 668 len(target) - 1) 669 670 if target[c].strip(): 671 fixed = target 672 else: 673 fixed = target[:c].rstrip() + target[c:].lstrip() 674 675 # There could be an escaped newline 676 # 677 # def foo(a=\ 678 # 1) 679 if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')): 680 self.source[line_index] = fixed.rstrip('\n\r \t\\') 681 self.source[line_index + 1] = self.source[line_index + 1].lstrip() 682 return [line_index + 1, line_index + 2] # Line indexed at 1 683 684 self.source[result['line'] - 1] = fixed 685 686 def fix_e262(self, result): 687 """Fix spacing after comment hash.""" 688 target = self.source[result['line'] - 1] 689 offset = result['column'] 690 691 code = target[:offset].rstrip(' \t#') 692 comment = target[offset:].lstrip(' \t#') 693 694 fixed = code + (' # ' + comment if comment.strip() else '\n') 695 696 self.source[result['line'] - 1] = fixed 697 698 def fix_e271(self, result): 699 """Fix extraneous whitespace around keywords.""" 700 line_index = result['line'] - 1 701 target = self.source[line_index] 702 offset = result['column'] - 1 703 704 if is_probably_part_of_multiline(target): 705 return [] 706 707 fixed = fix_whitespace(target, 708 offset=offset, 709 replacement=' ') 710 711 if fixed == target: 712 return [] 713 else: 714 self.source[line_index] = fixed 715 716 def fix_e301(self, result): 717 """Add missing blank line.""" 718 cr = '\n' 719 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1] 720 721 def fix_e302(self, result): 722 """Add missing 2 blank lines.""" 723 add_linenum = 2 - int(result['info'].split()[-1]) 724 cr = '\n' * add_linenum 725 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1] 726 727 def fix_e303(self, result): 728 """Remove extra blank lines.""" 729 delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2 730 delete_linenum = max(1, delete_linenum) 731 732 # We need to count because pep8 reports an offset line number if there 733 # are comments. 734 cnt = 0 735 line = result['line'] - 2 736 modified_lines = [] 737 while cnt < delete_linenum and line >= 0: 738 if not self.source[line].strip(): 739 self.source[line] = '' 740 modified_lines.append(1 + line) # Line indexed at 1 741 cnt += 1 742 line -= 1 743 744 return modified_lines 745 746 def fix_e304(self, result): 747 """Remove blank line following function decorator.""" 748 line = result['line'] - 2 749 if not self.source[line].strip(): 750 self.source[line] = '' 751 752 def fix_e401(self, result): 753 """Put imports on separate lines.""" 754 line_index = result['line'] - 1 755 target = self.source[line_index] 756 offset = result['column'] - 1 757 758 if not target.lstrip().startswith('import'): 759 return [] 760 761 indentation = re.split(pattern=r'\bimport\b', 762 string=target, maxsplit=1)[0] 763 fixed = (target[:offset].rstrip('\t ,') + '\n' + 764 indentation + 'import ' + target[offset:].lstrip('\t ,')) 765 self.source[line_index] = fixed 766 767 def fix_long_line_logically(self, result, logical): 768 """Try to make lines fit within --max-line-length characters.""" 769 if ( 770 not logical or 771 len(logical[2]) == 1 or 772 self.source[result['line'] - 1].lstrip().startswith('#') 773 ): 774 return self.fix_long_line_physically(result) 775 776 start_line_index = logical[0][0] 777 end_line_index = logical[1][0] 778 logical_lines = logical[2] 779 780 previous_line = get_item(self.source, start_line_index - 1, default='') 781 next_line = get_item(self.source, end_line_index + 1, default='') 782 783 single_line = join_logical_line(''.join(logical_lines)) 784 785 try: 786 fixed = self.fix_long_line( 787 target=single_line, 788 previous_line=previous_line, 789 next_line=next_line, 790 original=''.join(logical_lines)) 791 except (SyntaxError, tokenize.TokenError): 792 return self.fix_long_line_physically(result) 793 794 if fixed: 795 for line_index in range(start_line_index, end_line_index + 1): 796 self.source[line_index] = '' 797 self.source[start_line_index] = fixed 798 return range(start_line_index + 1, end_line_index + 1) 799 else: 800 return [] 801 802 def fix_long_line_physically(self, result): 803 """Try to make lines fit within --max-line-length characters.""" 804 line_index = result['line'] - 1 805 target = self.source[line_index] 806 807 previous_line = get_item(self.source, line_index - 1, default='') 808 next_line = get_item(self.source, line_index + 1, default='') 809 810 try: 811 fixed = self.fix_long_line( 812 target=target, 813 previous_line=previous_line, 814 next_line=next_line, 815 original=target) 816 except (SyntaxError, tokenize.TokenError): 817 return [] 818 819 if fixed: 820 self.source[line_index] = fixed 821 return [line_index + 1] 822 else: 823 return [] 824 825 def fix_long_line(self, target, previous_line, 826 next_line, original): 827 cache_entry = (target, previous_line, next_line) 828 if cache_entry in self.long_line_ignore_cache: 829 return [] 830 831 if target.lstrip().startswith('#'): 832 # Wrap commented lines. 833 return shorten_comment( 834 line=target, 835 max_line_length=self.options.max_line_length, 836 last_comment=not next_line.lstrip().startswith('#')) 837 838 fixed = get_fixed_long_line( 839 target=target, 840 previous_line=previous_line, 841 original=original, 842 indent_word=self.indent_word, 843 max_line_length=self.options.max_line_length, 844 aggressive=self.options.aggressive, 845 experimental=self.options.experimental, 846 verbose=self.options.verbose) 847 if fixed and not code_almost_equal(original, fixed): 848 return fixed 849 else: 850 self.long_line_ignore_cache.add(cache_entry) 851 return None 852 853 def fix_e502(self, result): 854 """Remove extraneous escape of newline.""" 855 line_index = result['line'] - 1 856 target = self.source[line_index] 857 self.source[line_index] = target.rstrip('\n\r \t\\') + '\n' 858 859 def fix_e701(self, result): 860 """Put colon-separated compound statement on separate lines.""" 861 line_index = result['line'] - 1 862 target = self.source[line_index] 863 c = result['column'] 864 865 fixed_source = (target[:c] + '\n' + 866 _get_indentation(target) + self.indent_word + 867 target[c:].lstrip('\n\r \t\\')) 868 self.source[result['line'] - 1] = fixed_source 869 return [result['line'], result['line'] + 1] 870 871 def fix_e702(self, result, logical): 872 """Put semicolon-separated compound statement on separate lines.""" 873 if not logical: 874 return [] # pragma: no cover 875 logical_lines = logical[2] 876 877 line_index = result['line'] - 1 878 target = self.source[line_index] 879 880 if target.rstrip().endswith('\\'): 881 # Normalize '1; \\\n2' into '1; 2'. 882 self.source[line_index] = target.rstrip('\n \r\t\\') 883 self.source[line_index + 1] = self.source[line_index + 1].lstrip() 884 return [line_index + 1, line_index + 2] 885 886 if target.rstrip().endswith(';'): 887 self.source[line_index] = target.rstrip('\n \r\t;') + '\n' 888 return [line_index + 1] 889 890 offset = result['column'] - 1 891 first = target[:offset].rstrip(';').rstrip() 892 second = (_get_indentation(logical_lines[0]) + 893 target[offset:].lstrip(';').lstrip()) 894 895 self.source[line_index] = first + '\n' + second 896 return [line_index + 1] 897 898 def fix_e711(self, result): 899 """Fix comparison with None.""" 900 line_index = result['line'] - 1 901 target = self.source[line_index] 902 offset = result['column'] - 1 903 904 right_offset = offset + 2 905 if right_offset >= len(target): 906 return [] 907 908 left = target[:offset].rstrip() 909 center = target[offset:right_offset] 910 right = target[right_offset:].lstrip() 911 912 if not right.startswith('None'): 913 return [] 914 915 if center.strip() == '==': 916 new_center = 'is' 917 elif center.strip() == '!=': 918 new_center = 'is not' 919 else: 920 return [] 921 922 self.source[line_index] = ' '.join([left, new_center, right]) 923 924 def fix_e712(self, result): 925 """Fix comparison with boolean.""" 926 line_index = result['line'] - 1 927 target = self.source[line_index] 928 offset = result['column'] - 1 929 930 # Handle very easy "not" special cases. 931 if re.match(r'^\s*if \w+ == False:$', target): 932 self.source[line_index] = re.sub(r'if (\w+) == False:', 933 r'if not \1:', target, count=1) 934 elif re.match(r'^\s*if \w+ != True:$', target): 935 self.source[line_index] = re.sub(r'if (\w+) != True:', 936 r'if not \1:', target, count=1) 937 else: 938 right_offset = offset + 2 939 if right_offset >= len(target): 940 return [] 941 942 left = target[:offset].rstrip() 943 center = target[offset:right_offset] 944 right = target[right_offset:].lstrip() 945 946 # Handle simple cases only. 947 new_right = None 948 if center.strip() == '==': 949 if re.match(r'\bTrue\b', right): 950 new_right = re.sub(r'\bTrue\b *', '', right, count=1) 951 elif center.strip() == '!=': 952 if re.match(r'\bFalse\b', right): 953 new_right = re.sub(r'\bFalse\b *', '', right, count=1) 954 955 if new_right is None: 956 return [] 957 958 if new_right[0].isalnum(): 959 new_right = ' ' + new_right 960 961 self.source[line_index] = left + new_right 962 963 def fix_e713(self, result): 964 """Fix non-membership check.""" 965 line_index = result['line'] - 1 966 target = self.source[line_index] 967 968 # Handle very easy case only. 969 if re.match(r'^\s*if not \w+ in \w+:$', target): 970 self.source[line_index] = re.sub(r'if not (\w+) in (\w+):', 971 r'if \1 not in \2:', 972 target, 973 count=1) 974 975 def fix_w291(self, result): 976 """Remove trailing whitespace.""" 977 fixed_line = self.source[result['line'] - 1].rstrip() 978 self.source[result['line'] - 1] = fixed_line + '\n' 979 980 981 def get_fixed_long_line(target, previous_line, original, 982 indent_word=' ', max_line_length=79, 983 aggressive=False, experimental=False, verbose=False): 984 """Break up long line and return result. 985 986 Do this by generating multiple reformatted candidates and then 987 ranking the candidates to heuristically select the best option. 988 989 """ 990 indent = _get_indentation(target) 991 source = target[len(indent):] 992 assert source.lstrip() == source 993 994 # Check for partial multiline. 995 tokens = list(generate_tokens(source)) 996 997 candidates = shorten_line( 998 tokens, source, indent, 999 indent_word, 1000 max_line_length, 1001 aggressive=aggressive, 1002 experimental=experimental, 1003 previous_line=previous_line) 1004 1005 # Also sort alphabetically as a tie breaker (for determinism). 1006 candidates = sorted( 1007 sorted(set(candidates).union([target, original])), 1008 key=lambda x: line_shortening_rank(x, 1009 indent_word, 1010 max_line_length, 1011 experimental)) 1012 1013 if verbose >= 4: 1014 print(('-' * 79 + '\n').join([''] + candidates + ['']), 1015 file=codecs.getwriter('utf-8')(sys.stderr.buffer 1016 if hasattr(sys.stderr, 1017 'buffer') 1018 else sys.stderr)) 1019 1020 if candidates: 1021 return candidates[0] 1022 1023 1024 def join_logical_line(logical_line): 1025 """Return single line based on logical line input.""" 1026 indentation = _get_indentation(logical_line) 1027 1028 return indentation + untokenize_without_newlines( 1029 generate_tokens(logical_line.lstrip())) + '\n' 1030 1031 1032 def untokenize_without_newlines(tokens): 1033 """Return source code based on tokens.""" 1034 text = '' 1035 last_row = 0 1036 last_column = -1 1037 1038 for t in tokens: 1039 token_string = t[1] 1040 (start_row, start_column) = t[2] 1041 (end_row, end_column) = t[3] 1042 1043 if start_row > last_row: 1044 last_column = 0 1045 if ( 1046 (start_column > last_column or token_string == '\n') and 1047 not text.endswith(' ') 1048 ): 1049 text += ' ' 1050 1051 if token_string != '\n': 1052 text += token_string 1053 1054 last_row = end_row 1055 last_column = end_column 1056 1057 return text 1058 1059 1060 def _find_logical(source_lines): 1061 # Make a variable which is the index of all the starts of lines. 1062 logical_start = [] 1063 logical_end = [] 1064 last_newline = True 1065 parens = 0 1066 for t in generate_tokens(''.join(source_lines)): 1067 if t[0] in [tokenize.COMMENT, tokenize.DEDENT, 1068 tokenize.INDENT, tokenize.NL, 1069 tokenize.ENDMARKER]: 1070 continue 1071 if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]: 1072 last_newline = True 1073 logical_end.append((t[3][0] - 1, t[2][1])) 1074 continue 1075 if last_newline and not parens: 1076 logical_start.append((t[2][0] - 1, t[2][1])) 1077 last_newline = False 1078 if t[0] == tokenize.OP: 1079 if t[1] in '([{': 1080 parens += 1 1081 elif t[1] in '}])': 1082 parens -= 1 1083 return (logical_start, logical_end) 1084 1085 1086 def _get_logical(source_lines, result, logical_start, logical_end): 1087 """Return the logical line corresponding to the result. 1088 1089 Assumes input is already E702-clean. 1090 1091 """ 1092 row = result['line'] - 1 1093 col = result['column'] - 1 1094 ls = None 1095 le = None 1096 for i in range(0, len(logical_start), 1): 1097 assert logical_end 1098 x = logical_end[i] 1099 if x[0] > row or (x[0] == row and x[1] > col): 1100 le = x 1101 ls = logical_start[i] 1102 break 1103 if ls is None: 1104 return None 1105 original = source_lines[ls[0]:le[0] + 1] 1106 return ls, le, original 1107 1108 1109 def get_item(items, index, default=None): 1110 if 0 <= index < len(items): 1111 return items[index] 1112 else: 1113 return default 1114 1115 1116 def reindent(source, indent_size): 1117 """Reindent all lines.""" 1118 reindenter = Reindenter(source) 1119 return reindenter.run(indent_size) 1120 1121 1122 def code_almost_equal(a, b): 1123 """Return True if code is similar. 1124 1125 Ignore whitespace when comparing specific line. 1126 1127 """ 1128 split_a = split_and_strip_non_empty_lines(a) 1129 split_b = split_and_strip_non_empty_lines(b) 1130 1131 if len(split_a) != len(split_b): 1132 return False 1133 1134 for index in range(len(split_a)): 1135 if ''.join(split_a[index].split()) != ''.join(split_b[index].split()): 1136 return False 1137 1138 return True 1139 1140 1141 def split_and_strip_non_empty_lines(text): 1142 """Return lines split by newline. 1143 1144 Ignore empty lines. 1145 1146 """ 1147 return [line.strip() for line in text.splitlines() if line.strip()] 1148 1149 1150 def fix_e265(source, aggressive=False): # pylint: disable=unused-argument 1151 """Format block comments.""" 1152 if '#' not in source: 1153 # Optimization. 1154 return source 1155 1156 ignored_line_numbers = multiline_string_lines( 1157 source, 1158 include_docstrings=True) | set(commented_out_code_lines(source)) 1159 1160 fixed_lines = [] 1161 sio = io.StringIO(source) 1162 for (line_number, line) in enumerate(sio.readlines(), start=1): 1163 if ( 1164 line.lstrip().startswith('#') and 1165 line_number not in ignored_line_numbers 1166 ): 1167 indentation = _get_indentation(line) 1168 line = line.lstrip() 1169 1170 # Normalize beginning if not a shebang. 1171 if len(line) > 1: 1172 if ( 1173 # Leave multiple spaces like '# ' alone. 1174 (line.count('#') > 1 or line[1].isalnum()) 1175 # Leave stylistic outlined blocks alone. 1176 and not line.rstrip().endswith('#') 1177 ): 1178 line = '# ' + line.lstrip('# \t') 1179 1180 fixed_lines.append(indentation + line) 1181 else: 1182 fixed_lines.append(line) 1183 1184 return ''.join(fixed_lines) 1185 1186 1187 def refactor(source, fixer_names, ignore=None): 1188 """Return refactored code using lib2to3. 1189 1190 Skip if ignore string is produced in the refactored code. 1191 1192 """ 1193 from lib2to3 import pgen2 1194 try: 1195 new_text = refactor_with_2to3(source, 1196 fixer_names=fixer_names) 1197 except (pgen2.parse.ParseError, 1198 SyntaxError, 1199 UnicodeDecodeError, 1200 UnicodeEncodeError): 1201 return source 1202 1203 if ignore: 1204 if ignore in new_text and ignore not in source: 1205 return source 1206 1207 return new_text 1208 1209 1210 def code_to_2to3(select, ignore): 1211 fixes = set() 1212 for code, fix in CODE_TO_2TO3.items(): 1213 if code_match(code, select=select, ignore=ignore): 1214 fixes |= set(fix) 1215 return fixes 1216 1217 1218 def fix_2to3(source, aggressive=True, select=None, ignore=None): 1219 """Fix various deprecated code (via lib2to3).""" 1220 if not aggressive: 1221 return source 1222 1223 select = select or [] 1224 ignore = ignore or [] 1225 1226 return refactor(source, 1227 code_to_2to3(select=select, 1228 ignore=ignore)) 1229 1230 1231 def fix_w602(source, aggressive=True): 1232 """Fix deprecated form of raising exception.""" 1233 if not aggressive: 1234 return source 1235 1236 return refactor(source, ['raise'], 1237 ignore='with_traceback') 1238 1239 1240 def find_newline(source): 1241 """Return type of newline used in source. 1242 1243 Input is a list of lines. 1244 1245 """ 1246 assert not isinstance(source, unicode) 1247 1248 counter = collections.defaultdict(int) 1249 for line in source: 1250 if line.endswith(CRLF): 1251 counter[CRLF] += 1 1252 elif line.endswith(CR): 1253 counter[CR] += 1 1254 elif line.endswith(LF): 1255 counter[LF] += 1 1256 1257 return (sorted(counter, key=counter.get, reverse=True) or [LF])[0] 1258 1259 1260 def _get_indentword(source): 1261 """Return indentation type.""" 1262 indent_word = ' ' # Default in case source has no indentation 1263 try: 1264 for t in generate_tokens(source): 1265 if t[0] == token.INDENT: 1266 indent_word = t[1] 1267 break 1268 except (SyntaxError, tokenize.TokenError): 1269 pass 1270 return indent_word 1271 1272 1273 def _get_indentation(line): 1274 """Return leading whitespace.""" 1275 if line.strip(): 1276 non_whitespace_index = len(line) - len(line.lstrip()) 1277 return line[:non_whitespace_index] 1278 else: 1279 return '' 1280 1281 1282 def get_diff_text(old, new, filename): 1283 """Return text of unified diff between old and new.""" 1284 newline = '\n' 1285 diff = difflib.unified_diff( 1286 old, new, 1287 'original/' + filename, 1288 'fixed/' + filename, 1289 lineterm=newline) 1290 1291 text = '' 1292 for line in diff: 1293 text += line 1294 1295 # Work around missing newline (http://bugs.python.org/issue2142). 1296 if text and not line.endswith(newline): 1297 text += newline + r'\ No newline at end of file' + newline 1298 1299 return text 1300 1301 1302 def _priority_key(pep8_result): 1303 """Key for sorting PEP8 results. 1304 1305 Global fixes should be done first. This is important for things like 1306 indentation. 1307 1308 """ 1309 priority = [ 1310 # Fix multiline colon-based before semicolon based. 1311 'e701', 1312 # Break multiline statements early. 1313 'e702', 1314 # Things that make lines longer. 1315 'e225', 'e231', 1316 # Remove extraneous whitespace before breaking lines. 1317 'e201', 1318 # Shorten whitespace in comment before resorting to wrapping. 1319 'e262' 1320 ] 1321 middle_index = 10000 1322 lowest_priority = [ 1323 # We need to shorten lines last since the logical fixer can get in a 1324 # loop, which causes us to exit early. 1325 'e501' 1326 ] 1327 key = pep8_result['id'].lower() 1328 try: 1329 return priority.index(key) 1330 except ValueError: 1331 try: 1332 return middle_index + lowest_priority.index(key) + 1 1333 except ValueError: 1334 return middle_index 1335 1336 1337 def shorten_line(tokens, source, indentation, indent_word, max_line_length, 1338 aggressive=False, experimental=False, previous_line=''): 1339 """Separate line at OPERATOR. 1340 1341 Multiple candidates will be yielded. 1342 1343 """ 1344 for candidate in _shorten_line(tokens=tokens, 1345 source=source, 1346 indentation=indentation, 1347 indent_word=indent_word, 1348 aggressive=aggressive, 1349 previous_line=previous_line): 1350 yield candidate 1351 1352 if aggressive: 1353 for key_token_strings in SHORTEN_OPERATOR_GROUPS: 1354 shortened = _shorten_line_at_tokens( 1355 tokens=tokens, 1356 source=source, 1357 indentation=indentation, 1358 indent_word=indent_word, 1359 key_token_strings=key_token_strings, 1360 aggressive=aggressive) 1361 1362 if shortened is not None and shortened != source: 1363 yield shortened 1364 1365 if experimental: 1366 for shortened in _shorten_line_at_tokens_new( 1367 tokens=tokens, 1368 source=source, 1369 indentation=indentation, 1370 max_line_length=max_line_length): 1371 1372 yield shortened 1373 1374 1375 def _shorten_line(tokens, source, indentation, indent_word, 1376 aggressive=False, previous_line=''): 1377 """Separate line at OPERATOR. 1378 1379 The input is expected to be free of newlines except for inside multiline 1380 strings and at the end. 1381 1382 Multiple candidates will be yielded. 1383 1384 """ 1385 for (token_type, 1386 token_string, 1387 start_offset, 1388 end_offset) in token_offsets(tokens): 1389 1390 if ( 1391 token_type == tokenize.COMMENT and 1392 not is_probably_part_of_multiline(previous_line) and 1393 not is_probably_part_of_multiline(source) and 1394 not source[start_offset + 1:].strip().lower().startswith( 1395 ('noqa', 'pragma:', 'pylint:')) 1396 ): 1397 # Move inline comments to previous line. 1398 first = source[:start_offset] 1399 second = source[start_offset:] 1400 yield (indentation + second.strip() + '\n' + 1401 indentation + first.strip() + '\n') 1402 elif token_type == token.OP and token_string != '=': 1403 # Don't break on '=' after keyword as this violates PEP 8. 1404 1405 assert token_type != token.INDENT 1406 1407 first = source[:end_offset] 1408 1409 second_indent = indentation 1410 if first.rstrip().endswith('('): 1411 second_indent += indent_word 1412 elif '(' in first: 1413 second_indent += ' ' * (1 + first.find('(')) 1414 else: 1415 second_indent += indent_word 1416 1417 second = (second_indent + source[end_offset:].lstrip()) 1418 if ( 1419 not second.strip() or 1420 second.lstrip().startswith('#') 1421 ): 1422 continue 1423 1424 # Do not begin a line with a comma 1425 if second.lstrip().startswith(','): 1426 continue 1427 # Do end a line with a dot 1428 if first.rstrip().endswith('.'): 1429 continue 1430 if token_string in '+-*/': 1431 fixed = first + ' \\' + '\n' + second 1432 else: 1433 fixed = first + '\n' + second 1434 1435 # Only fix if syntax is okay. 1436 if check_syntax(normalize_multiline(fixed) 1437 if aggressive else fixed): 1438 yield indentation + fixed 1439 1440 1441 # A convenient way to handle tokens. 1442 Token = collections.namedtuple('Token', ['token_type', 'token_string', 1443 'spos', 'epos', 'line']) 1444 1445 1446 class ReformattedLines(object): 1447 1448 """The reflowed lines of atoms. 1449 1450 Each part of the line is represented as an "atom." They can be moved 1451 around when need be to get the optimal formatting. 1452 1453 """ 1454 1455 ########################################################################### 1456 # Private Classes 1457 1458 class _Indent(object): 1459 1460 """Represent an indentation in the atom stream.""" 1461 1462 def __init__(self, indent_amt): 1463 self._indent_amt = indent_amt 1464 1465 def emit(self): 1466 return ' ' * self._indent_amt 1467 1468 @property 1469 def size(self): 1470 return self._indent_amt 1471 1472 class _Space(object): 1473 1474 """Represent a space in the atom stream.""" 1475 1476 def emit(self): 1477 return ' ' 1478 1479 @property 1480 def size(self): 1481 return 1 1482 1483 class _LineBreak(object): 1484 1485 """Represent a line break in the atom stream.""" 1486 1487 def emit(self): 1488 return '\n' 1489 1490 @property 1491 def size(self): 1492 return 0 1493 1494 def __init__(self, max_line_length): 1495 self._max_line_length = max_line_length 1496 self._lines = [] 1497 self._bracket_depth = 0 1498 self._prev_item = None 1499 self._prev_prev_item = None 1500 1501 def __repr__(self): 1502 return self.emit() 1503 1504 ########################################################################### 1505 # Public Methods 1506 1507 def add(self, obj, indent_amt, break_after_open_bracket): 1508 if isinstance(obj, Atom): 1509 self._add_item(obj, indent_amt) 1510 return 1511 1512 self._add_container(obj, indent_amt, break_after_open_bracket) 1513 1514 def add_comment(self, item): 1515 num_spaces = 2 1516 if len(self._lines) > 1: 1517 if isinstance(self._lines[-1], self._Space): 1518 num_spaces -= 1 1519 if len(self._lines) > 2: 1520 if isinstance(self._lines[-2], self._Space): 1521 num_spaces -= 1 1522 1523 while num_spaces > 0: 1524 self._lines.append(self._Space()) 1525 num_spaces -= 1 1526 self._lines.append(item) 1527 1528 def add_indent(self, indent_amt): 1529 self._lines.append(self._Indent(indent_amt)) 1530 1531 def add_line_break(self, indent): 1532 self._lines.append(self._LineBreak()) 1533 self.add_indent(len(indent)) 1534 1535 def add_line_break_at(self, index, indent_amt): 1536 self._lines.insert(index, self._LineBreak()) 1537 self._lines.insert(index + 1, self._Indent(indent_amt)) 1538 1539 def add_space_if_needed(self, curr_text, equal=False): 1540 if ( 1541 not self._lines or isinstance( 1542 self._lines[-1], (self._LineBreak, self._Indent, self._Space)) 1543 ): 1544 return 1545 1546 prev_text = unicode(self._prev_item) 1547 prev_prev_text = ( 1548 unicode(self._prev_prev_item) if self._prev_prev_item else '') 1549 1550 if ( 1551 # The previous item was a keyword or identifier and the current 1552 # item isn't an operator that doesn't require a space. 1553 ((self._prev_item.is_keyword or self._prev_item.is_string or 1554 self._prev_item.is_name or self._prev_item.is_number) and 1555 (curr_text[0] not in '([{.,:}])' or 1556 (curr_text[0] == '=' and equal))) or 1557 1558 # Don't place spaces around a '.', unless it's in an 'import' 1559 # statement. 1560 ((prev_prev_text != 'from' and prev_text[-1] != '.' and 1561 curr_text != 'import') and 1562 1563 # Don't place a space before a colon. 1564 curr_text[0] != ':' and 1565 1566 # Don't split up ending brackets by spaces. 1567 ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or 1568 1569 # Put a space after a colon or comma. 1570 prev_text[-1] in ':,' or 1571 1572 # Put space around '=' if asked to. 1573 (equal and prev_text == '=') or 1574 1575 # Put spaces around non-unary arithmetic operators. 1576 ((self._prev_prev_item and 1577 (prev_text not in '+-' and 1578 (self._prev_prev_item.is_name or 1579 self._prev_prev_item.is_number or 1580 self._prev_prev_item.is_string)) and 1581 prev_text in ('+', '-', '%', '*', '/', '//', '**'))))) 1582 ): 1583 self._lines.append(self._Space()) 1584 1585 def previous_item(self): 1586 """Return the previous non-whitespace item.""" 1587 return self._prev_item 1588 1589 def fits_on_current_line(self, item_extent): 1590 return self.current_size() + item_extent <= self._max_line_length 1591 1592 def current_size(self): 1593 """The size of the current line minus the indentation.""" 1594 size = 0 1595 for item in reversed(self._lines): 1596 size += item.size 1597 if isinstance(item, self._LineBreak): 1598 break 1599 1600 return size 1601 1602 def line_empty(self): 1603 return (self._lines and 1604 isinstance(self._lines[-1], 1605 (self._LineBreak, self._Indent))) 1606 1607 def emit(self): 1608 string = '' 1609 for item in self._lines: 1610 if isinstance(item, self._LineBreak): 1611 string = string.rstrip() 1612 string += item.emit() 1613 1614 return string.rstrip() + '\n' 1615 1616 ########################################################################### 1617 # Private Methods 1618 1619 def _add_item(self, item, indent_amt): 1620 """Add an item to the line. 1621 1622 Reflow the line to get the best formatting after the item is 1623 inserted. The bracket depth indicates if the item is being 1624 inserted inside of a container or not. 1625 1626 """ 1627 if self._prev_item and self._prev_item.is_string and item.is_string: 1628 # Place consecutive string literals on separate lines. 1629 self._lines.append(self._LineBreak()) 1630 self._lines.append(self._Indent(indent_amt)) 1631 1632 item_text = unicode(item) 1633 if self._lines and self._bracket_depth: 1634 # Adding the item into a container. 1635 self._prevent_default_initializer_splitting(item, indent_amt) 1636 1637 if item_text in '.,)]}': 1638 self._split_after_delimiter(item, indent_amt) 1639 1640 elif self._lines and not self.line_empty(): 1641 # Adding the item outside of a container. 1642 if self.fits_on_current_line(len(item_text)): 1643 self._enforce_space(item) 1644 1645 else: 1646 # Line break for the new item. 1647 self._lines.append(self._LineBreak()) 1648 self._lines.append(self._Indent(indent_amt)) 1649 1650 self._lines.append(item) 1651 self._prev_item, self._prev_prev_item = item, self._prev_item 1652 1653 if item_text in '([{': 1654 self._bracket_depth += 1 1655 1656 elif item_text in '}])': 1657 self._bracket_depth -= 1 1658 assert self._bracket_depth >= 0 1659 1660 def _add_container(self, container, indent_amt, break_after_open_bracket): 1661 actual_indent = indent_amt + 1 1662 1663 if ( 1664 unicode(self._prev_item) != '=' and 1665 not self.line_empty() and 1666 not self.fits_on_current_line( 1667 container.size + self._bracket_depth + 2) 1668 ): 1669 1670 if unicode(container)[0] == '(' and self._prev_item.is_name: 1671 # Don't split before the opening bracket of a call. 1672 break_after_open_bracket = True 1673 actual_indent = indent_amt + 4 1674 elif ( 1675 break_after_open_bracket or 1676 unicode(self._prev_item) not in '([{' 1677 ): 1678 # If the container doesn't fit on the current line and the 1679 # current line isn't empty, place the container on the next 1680 # line. 1681 self._lines.append(self._LineBreak()) 1682 self._lines.append(self._Indent(indent_amt)) 1683 break_after_open_bracket = False 1684 else: 1685 actual_indent = self.current_size() + 1 1686 break_after_open_bracket = False 1687 1688 if isinstance(container, (ListComprehension, IfExpression)): 1689 actual_indent = indent_amt 1690 1691 # Increase the continued indentation only if recursing on a 1692 # container. 1693 container.reflow(self, ' ' * actual_indent, 1694 break_after_open_bracket=break_after_open_bracket) 1695 1696 def _prevent_default_initializer_splitting(self, item, indent_amt): 1697 """Prevent splitting between a default initializer. 1698 1699 When there is a default initializer, it's best to keep it all on 1700 the same line. It's nicer and more readable, even if it goes 1701 over the maximum allowable line length. This goes back along the 1702 current line to determine if we have a default initializer, and, 1703 if so, to remove extraneous whitespaces and add a line 1704 break/indent before it if needed. 1705 1706 """ 1707 if unicode(item) == '=': 1708 # This is the assignment in the initializer. Just remove spaces for 1709 # now. 1710 self._delete_whitespace() 1711 return 1712 1713 if (not self._prev_item or not self._prev_prev_item or 1714 unicode(self._prev_item) != '='): 1715 return 1716 1717 self._delete_whitespace() 1718 prev_prev_index = self._lines.index(self._prev_prev_item) 1719 1720 if ( 1721 isinstance(self._lines[prev_prev_index - 1], self._Indent) or 1722 self.fits_on_current_line(item.size + 1) 1723 ): 1724 # The default initializer is already the only item on this line. 1725 # Don't insert a newline here. 1726 return 1727 1728 # Replace the space with a newline/indent combo. 1729 if isinstance(self._lines[prev_prev_index - 1], self._Space): 1730 del self._lines[prev_prev_index - 1] 1731 1732 self.add_line_break_at(self._lines.index(self._prev_prev_item), 1733 indent_amt) 1734 1735 def _split_after_delimiter(self, item, indent_amt): 1736 """Split the line only after a delimiter.""" 1737 self._delete_whitespace() 1738 1739 if self.fits_on_current_line(item.size): 1740 return 1741 1742 last_space = None 1743 for item in reversed(self._lines): 1744 if ( 1745 last_space and 1746 (not isinstance(item, Atom) or not item.is_colon) 1747 ): 1748 break 1749 else: 1750 last_space = None 1751 if isinstance(item, self._Space): 1752 last_space = item 1753 if isinstance(item, (self._LineBreak, self._Indent)): 1754 return 1755 1756 if not last_space: 1757 return 1758 1759 self.add_line_break_at(self._lines.index(last_space), indent_amt) 1760 1761 def _enforce_space(self, item): 1762 """Enforce a space in certain situations. 1763 1764 There are cases where we will want a space where normally we 1765 wouldn't put one. This just enforces the addition of a space. 1766 1767 """ 1768 if isinstance(self._lines[-1], 1769 (self._Space, self._LineBreak, self._Indent)): 1770 return 1771 1772 if not self._prev_item: 1773 return 1774 1775 item_text = unicode(item) 1776 prev_text = unicode(self._prev_item) 1777 1778 # Prefer a space around a '.' in an import statement, and between the 1779 # 'import' and '('. 1780 if ( 1781 (item_text == '.' and prev_text == 'from') or 1782 (item_text == 'import' and prev_text == '.') or 1783 (item_text == '(' and prev_text == 'import') 1784 ): 1785 self._lines.append(self._Space()) 1786 1787 def _delete_whitespace(self): 1788 """Delete all whitespace from the end of the line.""" 1789 while isinstance(self._lines[-1], (self._Space, self._LineBreak, 1790 self._Indent)): 1791 del self._lines[-1] 1792 1793 1794 class Atom(object): 1795 1796 """The smallest unbreakable unit that can be reflowed.""" 1797 1798 def __init__(self, atom): 1799 self._atom = atom 1800 1801 def __repr__(self): 1802 return self._atom.token_string 1803 1804 def __len__(self): 1805 return self.size 1806 1807 def reflow( 1808 self, reflowed_lines, continued_indent, extent, 1809 break_after_open_bracket=False, 1810 is_list_comp_or_if_expr=False, 1811 next_is_dot=False 1812 ): 1813 if self._atom.token_type == tokenize.COMMENT: 1814 reflowed_lines.add_comment(self) 1815 return 1816 1817 total_size = extent if extent else self.size 1818 1819 if self._atom.token_string not in ',:([{}])': 1820 # Some atoms will need an extra 1-sized space token after them. 1821 total_size += 1 1822 1823 prev_item = reflowed_lines.previous_item() 1824 if ( 1825 not is_list_comp_or_if_expr and 1826 not reflowed_lines.fits_on_current_line(total_size) and 1827 not (next_is_dot and 1828 reflowed_lines.fits_on_current_line(self.size + 1)) and 1829 not reflowed_lines.line_empty() and 1830 not self.is_colon and 1831 not (prev_item and prev_item.is_name and 1832 unicode(self) == '(') 1833 ): 1834 # Start a new line if there is already something on the line and 1835 # adding this atom would make it go over the max line length. 1836 reflowed_lines.add_line_break(continued_indent) 1837 else: 1838 reflowed_lines.add_space_if_needed(unicode(self)) 1839 1840 reflowed_lines.add(self, len(continued_indent), 1841 break_after_open_bracket) 1842 1843 def emit(self): 1844 return self.__repr__() 1845 1846 @property 1847 def is_keyword(self): 1848 return keyword.iskeyword(self._atom.token_string) 1849 1850 @property 1851 def is_string(self): 1852 return self._atom.token_type == tokenize.STRING 1853 1854 @property 1855 def is_name(self): 1856 return self._atom.token_type == tokenize.NAME 1857 1858 @property 1859 def is_number(self): 1860 return self._atom.token_type == tokenize.NUMBER 1861 1862 @property 1863 def is_comma(self): 1864 return self._atom.token_string == ',' 1865 1866 @property 1867 def is_colon(self): 1868 return self._atom.token_string == ':' 1869 1870 @property 1871 def size(self): 1872 return len(self._atom.token_string) 1873 1874 1875 class Container(object): 1876 1877 """Base class for all container types.""" 1878 1879 def __init__(self, items): 1880 self._items = items 1881 1882 def __repr__(self): 1883 string = '' 1884 last_was_keyword = False 1885 1886 for item in self._items: 1887 if item.is_comma: 1888 string += ', ' 1889 elif item.is_colon: 1890 string += ': ' 1891 else: 1892 item_string = unicode(item) 1893 if ( 1894 string and 1895 (last_was_keyword or 1896 (not string.endswith(tuple('([{,.:}]) ')) and 1897 not item_string.startswith(tuple('([{,.:}])')))) 1898 ): 1899 string += ' ' 1900 string += item_string 1901 1902 last_was_keyword = item.is_keyword 1903 return string 1904 1905 def __iter__(self): 1906 for element in self._items: 1907 yield element 1908 1909 def __getitem__(self, idx): 1910 return self._items[idx] 1911 1912 def reflow(self, reflowed_lines, continued_indent, 1913 break_after_open_bracket=False): 1914 last_was_container = False 1915 for (index, item) in enumerate(self._items): 1916 next_item = get_item(self._items, index + 1) 1917 1918 if isinstance(item, Atom): 1919 is_list_comp_or_if_expr = ( 1920 isinstance(self, (ListComprehension, IfExpression))) 1921 item.reflow(reflowed_lines, continued_indent, 1922 self._get_extent(index), 1923 is_list_comp_or_if_expr=is_list_comp_or_if_expr, 1924 next_is_dot=(next_item and 1925 unicode(next_item) == '.')) 1926 if last_was_container and item.is_comma: 1927 reflowed_lines.add_line_break(continued_indent) 1928 last_was_container = False 1929 else: # isinstance(item, Container) 1930 reflowed_lines.add(item, len(continued_indent), 1931 break_after_open_bracket) 1932 last_was_container = not isinstance(item, (ListComprehension, 1933 IfExpression)) 1934 1935 if ( 1936 break_after_open_bracket and index == 0 and 1937 # Prefer to keep empty containers together instead of 1938 # separating them. 1939 unicode(item) == self.open_bracket and 1940 (not next_item or unicode(next_item) != self.close_bracket) and 1941 (len(self._items) != 3 or not isinstance(next_item, Atom)) 1942 ): 1943 reflowed_lines.add_line_break(continued_indent) 1944 break_after_open_bracket = False 1945 else: 1946 next_next_item = get_item(self._items, index + 2) 1947 if ( 1948 unicode(item) not in ['.', '%', 'in'] and 1949 next_item and not isinstance(next_item, Container) and 1950 unicode(next_item) != ':' and 1951 next_next_item and (not isinstance(next_next_item, Atom) or 1952 unicode(next_item) == 'not') and 1953 not reflowed_lines.line_empty() and 1954 not reflowed_lines.fits_on_current_line( 1955 self._get_extent(index + 1) + 2) 1956 ): 1957 reflowed_lines.add_line_break(continued_indent) 1958 1959 def _get_extent(self, index): 1960 """The extent of the full element. 1961 1962 E.g., the length of a function call or keyword. 1963 1964 """ 1965 extent = 0 1966 prev_item = get_item(self._items, index - 1) 1967 seen_dot = prev_item and unicode(prev_item) == '.' 1968 while index < len(self._items): 1969 item = get_item(self._items, index) 1970 index += 1 1971 1972 if isinstance(item, (ListComprehension, IfExpression)): 1973 break 1974 1975 if isinstance(item, Container): 1976 if prev_item and prev_item.is_name: 1977 if seen_dot: 1978 extent += 1 1979 else: 1980 extent += item.size 1981 1982 prev_item = item 1983 continue 1984 elif (unicode(item) not in ['.', '=', ':', 'not'] and 1985 not item.is_name and not item.is_string): 1986 break 1987 1988 if unicode(item) == '.': 1989 seen_dot = True 1990 1991 extent += item.size 1992 prev_item = item 1993 1994 return extent 1995 1996 @property 1997 def is_string(self): 1998 return False 1999 2000 @property 2001 def size(self): 2002 return len(self.__repr__()) 2003 2004 @property 2005 def is_keyword(self): 2006 return False 2007 2008 @property 2009 def is_name(self): 2010 return False 2011 2012 @property 2013 def is_comma(self): 2014 return False 2015 2016 @property 2017 def is_colon(self): 2018 return False 2019 2020 @property 2021 def open_bracket(self): 2022 return None 2023 2024 @property 2025 def close_bracket(self): 2026 return None 2027 2028 2029 class Tuple(Container): 2030 2031 """A high-level representation of a tuple.""" 2032 2033 @property 2034 def open_bracket(self): 2035 return '(' 2036 2037 @property 2038 def close_bracket(self): 2039 return ')' 2040 2041 2042 class List(Container): 2043 2044 """A high-level representation of a list.""" 2045 2046 @property 2047 def open_bracket(self): 2048 return '[' 2049 2050 @property 2051 def close_bracket(self): 2052 return ']' 2053 2054 2055 class DictOrSet(Container): 2056 2057 """A high-level representation of a dictionary or set.""" 2058 2059 @property 2060 def open_bracket(self): 2061 return '{' 2062 2063 @property 2064 def close_bracket(self): 2065 return '}' 2066 2067 2068 class ListComprehension(Container): 2069 2070 """A high-level representation of a list comprehension.""" 2071 2072 @property 2073 def size(self): 2074 length = 0 2075 for item in self._items: 2076 if isinstance(item, IfExpression): 2077 break 2078 length += item.size 2079 return length 2080 2081 2082 class IfExpression(Container): 2083 2084 """A high-level representation of an if-expression.""" 2085 2086 2087 def _parse_container(tokens, index, for_or_if=None): 2088 """Parse a high-level container, such as a list, tuple, etc.""" 2089 2090 # Store the opening bracket. 2091 items = [Atom(Token(*tokens[index]))] 2092 index += 1 2093 2094 num_tokens = len(tokens) 2095 while index < num_tokens: 2096 tok = Token(*tokens[index]) 2097 2098 if tok.token_string in ',)]}': 2099 # First check if we're at the end of a list comprehension or 2100 # if-expression. Don't add the ending token as part of the list 2101 # comprehension or if-expression, because they aren't part of those 2102 # constructs. 2103 if for_or_if == 'for': 2104 return (ListComprehension(items), index - 1) 2105 2106 elif for_or_if == 'if': 2107 return (IfExpression(items), index - 1) 2108 2109 # We've reached the end of a container. 2110 items.append(Atom(tok)) 2111 2112 # If not, then we are at the end of a container. 2113 if tok.token_string == ')': 2114 # The end of a tuple. 2115 return (Tuple(items), index) 2116 2117 elif tok.token_string == ']': 2118 # The end of a list. 2119 return (List(items), index) 2120 2121 elif tok.token_string == '}': 2122 # The end of a dictionary or set. 2123 return (DictOrSet(items), index) 2124 2125 elif tok.token_string in '([{': 2126 # A sub-container is being defined. 2127 (container, index) = _parse_container(tokens, index) 2128 items.append(container) 2129 2130 elif tok.token_string == 'for': 2131 (container, index) = _parse_container(tokens, index, 'for') 2132 items.append(container) 2133 2134 elif tok.token_string == 'if': 2135 (container, index) = _parse_container(tokens, index, 'if') 2136 items.append(container) 2137 2138 else: 2139 items.append(Atom(tok)) 2140 2141 index += 1 2142 2143 return (None, None) 2144 2145 2146 def _parse_tokens(tokens): 2147 """Parse the tokens. 2148 2149 This converts the tokens into a form where we can manipulate them 2150 more easily. 2151 2152 """ 2153 2154 index = 0 2155 parsed_tokens = [] 2156 2157 num_tokens = len(tokens) 2158 while index < num_tokens: 2159 tok = Token(*tokens[index]) 2160 2161 assert tok.token_type != token.INDENT 2162 if tok.token_type == tokenize.NEWLINE: 2163 # There's only one newline and it's at the end. 2164 break 2165 2166 if tok.token_string in '([{': 2167 (container, index) = _parse_container(tokens, index) 2168 if not container: 2169 return None 2170 parsed_tokens.append(container) 2171 else: 2172 parsed_tokens.append(Atom(tok)) 2173 2174 index += 1 2175 2176 return parsed_tokens 2177 2178 2179 def _reflow_lines(parsed_tokens, indentation, max_line_length, 2180 start_on_prefix_line): 2181 """Reflow the lines so that it looks nice.""" 2182 2183 if unicode(parsed_tokens[0]) == 'def': 2184 # A function definition gets indented a bit more. 2185 continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE 2186 else: 2187 continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE 2188 2189 break_after_open_bracket = not start_on_prefix_line 2190 2191 lines = ReformattedLines(max_line_length) 2192 lines.add_indent(len(indentation.lstrip('\r\n'))) 2193 2194 if not start_on_prefix_line: 2195 # If splitting after the opening bracket will cause the first element 2196 # to be aligned weirdly, don't try it. 2197 first_token = get_item(parsed_tokens, 0) 2198 second_token = get_item(parsed_tokens, 1) 2199 2200 if ( 2201 first_token and second_token and 2202 unicode(second_token)[0] == '(' and 2203 len(indentation) + len(first_token) + 1 == len(continued_indent) 2204 ): 2205 return None 2206 2207 for item in parsed_tokens: 2208 lines.add_space_if_needed(unicode(item), equal=True) 2209 2210 save_continued_indent = continued_indent 2211 if start_on_prefix_line and isinstance(item, Container): 2212 start_on_prefix_line = False 2213 continued_indent = ' ' * (lines.current_size() + 1) 2214 2215 item.reflow(lines, continued_indent, break_after_open_bracket) 2216 continued_indent = save_continued_indent 2217 2218 return lines.emit() 2219 2220 2221 def _shorten_line_at_tokens_new(tokens, source, indentation, 2222 max_line_length): 2223 """Shorten the line taking its length into account. 2224 2225 The input is expected to be free of newlines except for inside 2226 multiline strings and at the end. 2227 2228 """ 2229 # Yield the original source so to see if it's a better choice than the 2230 # shortened candidate lines we generate here. 2231 yield indentation + source 2232 2233 parsed_tokens = _parse_tokens(tokens) 2234 2235 if parsed_tokens: 2236 # Perform two reflows. The first one starts on the same line as the 2237 # prefix. The second starts on the line after the prefix. 2238 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, 2239 start_on_prefix_line=True) 2240 if fixed and check_syntax(normalize_multiline(fixed.lstrip())): 2241 yield fixed 2242 2243 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, 2244 start_on_prefix_line=False) 2245 if fixed and check_syntax(normalize_multiline(fixed.lstrip())): 2246 yield fixed 2247 2248 2249 def _shorten_line_at_tokens(tokens, source, indentation, indent_word, 2250 key_token_strings, aggressive): 2251 """Separate line by breaking at tokens in key_token_strings. 2252 2253 The input is expected to be free of newlines except for inside 2254 multiline strings and at the end. 2255 2256 """ 2257 offsets = [] 2258 for (index, _t) in enumerate(token_offsets(tokens)): 2259 (token_type, 2260 token_string, 2261 start_offset, 2262 end_offset) = _t 2263 2264 assert token_type != token.INDENT 2265 2266 if token_string in key_token_strings: 2267 # Do not break in containers with zero or one items. 2268 unwanted_next_token = { 2269 '(': ')', 2270 '[': ']', 2271 '{': '}'}.get(token_string) 2272 if unwanted_next_token: 2273 if ( 2274 get_item(tokens, 2275 index + 1, 2276 default=[None, None])[1] == unwanted_next_token or 2277 get_item(tokens, 2278 index + 2, 2279 default=[None, None])[1] == unwanted_next_token 2280 ): 2281 continue 2282 2283 if ( 2284 index > 2 and token_string == '(' and 2285 tokens[index - 1][1] in ',(%[' 2286 ): 2287 # Don't split after a tuple start, or before a tuple start if 2288 # the tuple is in a list. 2289 continue 2290 2291 if end_offset < len(source) - 1: 2292 # Don't split right before newline. 2293 offsets.append(end_offset) 2294 else: 2295 # Break at adjacent strings. These were probably meant to be on 2296 # separate lines in the first place. 2297 previous_token = get_item(tokens, index - 1) 2298 if ( 2299 token_type == tokenize.STRING and 2300 previous_token and previous_token[0] == tokenize.STRING 2301 ): 2302 offsets.append(start_offset) 2303 2304 current_indent = None 2305 fixed = None 2306 for line in split_at_offsets(source, offsets): 2307 if fixed: 2308 fixed += '\n' + current_indent + line 2309 2310 for symbol in '([{': 2311 if line.endswith(symbol): 2312 current_indent += indent_word 2313 else: 2314 # First line. 2315 fixed = line 2316 assert not current_indent 2317 current_indent = indent_word 2318 2319 assert fixed is not None 2320 2321 if check_syntax(normalize_multiline(fixed) 2322 if aggressive > 1 else fixed): 2323 return indentation + fixed 2324 else: 2325 return None 2326 2327 2328 def token_offsets(tokens): 2329 """Yield tokens and offsets.""" 2330 end_offset = 0 2331 previous_end_row = 0 2332 previous_end_column = 0 2333 for t in tokens: 2334 token_type = t[0] 2335 token_string = t[1] 2336 (start_row, start_column) = t[2] 2337 (end_row, end_column) = t[3] 2338 2339 # Account for the whitespace between tokens. 2340 end_offset += start_column 2341 if previous_end_row == start_row: 2342 end_offset -= previous_end_column 2343 2344 # Record the start offset of the token. 2345 start_offset = end_offset 2346 2347 # Account for the length of the token itself. 2348 end_offset += len(token_string) 2349 2350 yield (token_type, 2351 token_string, 2352 start_offset, 2353 end_offset) 2354 2355 previous_end_row = end_row 2356 previous_end_column = end_column 2357 2358 2359 def normalize_multiline(line): 2360 """Normalize multiline-related code that will cause syntax error. 2361 2362 This is for purposes of checking syntax. 2363 2364 """ 2365 if line.startswith('def ') and line.rstrip().endswith(':'): 2366 return line + ' pass' 2367 elif line.startswith('return '): 2368 return 'def _(): ' + line 2369 elif line.startswith('@'): 2370 return line + 'def _(): pass' 2371 elif line.startswith('class '): 2372 return line + ' pass' 2373 elif line.startswith('if '): 2374 return line + ' pass' 2375 else: 2376 return line 2377 2378 2379 def fix_whitespace(line, offset, replacement): 2380 """Replace whitespace at offset and return fixed line.""" 2381 # Replace escaped newlines too 2382 left = line[:offset].rstrip('\n\r \t\\') 2383 right = line[offset:].lstrip('\n\r \t\\') 2384 if right.startswith('#'): 2385 return line 2386 else: 2387 return left + replacement + right 2388 2389 2390 def _execute_pep8(pep8_options, source): 2391 """Execute pep8 via python method calls.""" 2392 class QuietReport(pep8.BaseReport): 2393 2394 """Version of checker that does not print.""" 2395 2396 def __init__(self, options): 2397 super(QuietReport, self).__init__(options) 2398 self.__full_error_results = [] 2399 2400 def error(self, line_number, offset, text, _): 2401 """Collect errors.""" 2402 code = super(QuietReport, self).error(line_number, offset, text, _) 2403 if code: 2404 self.__full_error_results.append( 2405 {'id': code, 2406 'line': line_number, 2407 'column': offset + 1, 2408 'info': text}) 2409 2410 def full_error_results(self): 2411 """Return error results in detail. 2412 2413 Results are in the form of a list of dictionaries. Each 2414 dictionary contains 'id', 'line', 'column', and 'info'. 2415 2416 """ 2417 return self.__full_error_results 2418 2419 checker = pep8.Checker('', lines=source, 2420 reporter=QuietReport, **pep8_options) 2421 checker.check_all() 2422 return checker.report.full_error_results() 2423 2424 2425 def _remove_leading_and_normalize(line): 2426 return line.lstrip().rstrip(CR + LF) + '\n' 2427 2428 2429 class Reindenter(object): 2430 2431 """Reindents badly-indented code to uniformly use four-space indentation. 2432 2433 Released to the public domain, by Tim Peters, 03 October 2000. 2434 2435 """ 2436 2437 def __init__(self, input_text): 2438 sio = io.StringIO(input_text) 2439 source_lines = sio.readlines() 2440 2441 self.string_content_line_numbers = multiline_string_lines(input_text) 2442 2443 # File lines, rstripped & tab-expanded. Dummy at start is so 2444 # that we can use tokenize's 1-based line numbering easily. 2445 # Note that a line is all-blank iff it is a newline. 2446 self.lines = [] 2447 for line_number, line in enumerate(source_lines, start=1): 2448 # Do not modify if inside a multiline string. 2449 if line_number in self.string_content_line_numbers: 2450 self.lines.append(line) 2451 else: 2452 # Only expand leading tabs. 2453 self.lines.append(_get_indentation(line).expandtabs() + 2454 _remove_leading_and_normalize(line)) 2455 2456 self.lines.insert(0, None) 2457 self.index = 1 # index into self.lines of next line 2458 self.input_text = input_text 2459 2460 def run(self, indent_size=DEFAULT_INDENT_SIZE): 2461 """Fix indentation and return modified line numbers. 2462 2463 Line numbers are indexed at 1. 2464 2465 """ 2466 if indent_size < 1: 2467 return self.input_text 2468 2469 try: 2470 stats = _reindent_stats(tokenize.generate_tokens(self.getline)) 2471 except (SyntaxError, tokenize.TokenError): 2472 return self.input_text 2473 # Remove trailing empty lines. 2474 lines = self.lines 2475 while lines and lines[-1] == '\n': 2476 lines.pop() 2477 # Sentinel. 2478 stats.append((len(lines), 0)) 2479 # Map count of leading spaces to # we want. 2480 have2want = {} 2481 # Program after transformation. 2482 after = [] 2483 # Copy over initial empty lines -- there's nothing to do until 2484 # we see a line with *something* on it. 2485 i = stats[0][0] 2486 after.extend(lines[1:i]) 2487 for i in range(len(stats) - 1): 2488 thisstmt, thislevel = stats[i] 2489 nextstmt = stats[i + 1][0] 2490 have = _leading_space_count(lines[thisstmt]) 2491 want = thislevel * indent_size 2492 if want < 0: 2493 # A comment line. 2494 if have: 2495 # An indented comment line. If we saw the same 2496 # indentation before, reuse what it most recently 2497 # mapped to. 2498 want = have2want.get(have, -1) 2499 if want < 0: 2500 # Then it probably belongs to the next real stmt. 2501 for j in range(i + 1, len(stats) - 1): 2502 jline, jlevel = stats[j] 2503 if jlevel >= 0: 2504 if have == _leading_space_count(lines[jline]): 2505 want = jlevel * indent_size 2506 break 2507 if want < 0: # Maybe it's a hanging 2508 # comment like this one, 2509 # in which case we should shift it like its base 2510 # line got shifted. 2511 for j in range(i - 1, -1, -1): 2512 jline, jlevel = stats[j] 2513 if jlevel >= 0: 2514 want = (have + _leading_space_count( 2515 after[jline - 1]) - 2516 _leading_space_count(lines[jline])) 2517 break 2518 if want < 0: 2519 # Still no luck -- leave it alone. 2520 want = have 2521 else: 2522 want = 0 2523 assert want >= 0 2524 have2want[have] = want 2525 diff = want - have 2526 if diff == 0 or have == 0: 2527 after.extend(lines[thisstmt:nextstmt]) 2528 else: 2529 for line_number, line in enumerate(lines[thisstmt:nextstmt], 2530 start=thisstmt): 2531 if line_number in self.string_content_line_numbers: 2532 after.append(line) 2533 elif diff > 0: 2534 if line == '\n': 2535 after.append(line) 2536 else: 2537 after.append(' ' * diff + line) 2538 else: 2539 remove = min(_leading_space_count(line), -diff) 2540 after.append(line[remove:]) 2541 2542 return ''.join(after) 2543 2544 def getline(self): 2545 """Line-getter for tokenize.""" 2546 if self.index >= len(self.lines): 2547 line = '' 2548 else: 2549 line = self.lines[self.index] 2550 self.index += 1 2551 return line 2552 2553 2554 def _reindent_stats(tokens): 2555 """Return list of (lineno, indentlevel) pairs. 2556 2557 One for each stmt and comment line. indentlevel is -1 for comment lines, as 2558 a signal that tokenize doesn't know what to do about them; indeed, they're 2559 our headache! 2560 2561 """ 2562 find_stmt = 1 # Next token begins a fresh stmt? 2563 level = 0 # Current indent level. 2564 stats = [] 2565 2566 for t in tokens: 2567 token_type = t[0] 2568 sline = t[2][0] 2569 line = t[4] 2570 2571 if token_type == tokenize.NEWLINE: 2572 # A program statement, or ENDMARKER, will eventually follow, 2573 # after some (possibly empty) run of tokens of the form 2574 # (NL | COMMENT)* (INDENT | DEDENT+)? 2575 find_stmt = 1 2576 2577 elif token_type == tokenize.INDENT: 2578 find_stmt = 1 2579 level += 1 2580 2581 elif token_type == tokenize.DEDENT: 2582 find_stmt = 1 2583 level -= 1 2584 2585 elif token_type == tokenize.COMMENT: 2586 if find_stmt: 2587 stats.append((sline, -1)) 2588 # But we're still looking for a new stmt, so leave 2589 # find_stmt alone. 2590 2591 elif token_type == tokenize.NL: 2592 pass 2593 2594 elif find_stmt: 2595 # This is the first "real token" following a NEWLINE, so it 2596 # must be the first token of the next program statement, or an 2597 # ENDMARKER. 2598 find_stmt = 0 2599 if line: # Not endmarker. 2600 stats.append((sline, level)) 2601 2602 return stats 2603 2604 2605 def _leading_space_count(line): 2606 """Return number of leading spaces in line.""" 2607 i = 0 2608 while i < len(line) and line[i] == ' ': 2609 i += 1 2610 return i 2611 2612 2613 def refactor_with_2to3(source_text, fixer_names): 2614 """Use lib2to3 to refactor the source. 2615 2616 Return the refactored source code. 2617 2618 """ 2619 from lib2to3.refactor import RefactoringTool 2620 fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names] 2621 tool = RefactoringTool(fixer_names=fixers, explicit=fixers) 2622 2623 from lib2to3.pgen2 import tokenize as lib2to3_tokenize 2624 try: 2625 return unicode(tool.refactor_string(source_text, name='')) 2626 except lib2to3_tokenize.TokenError: 2627 return source_text 2628 2629 2630 def check_syntax(code): 2631 """Return True if syntax is okay.""" 2632 try: 2633 return compile(code, '<string>', 'exec') 2634 except (SyntaxError, TypeError, UnicodeDecodeError): 2635 return False 2636 2637 2638 def filter_results(source, results, aggressive): 2639 """Filter out spurious reports from pep8. 2640 2641 If aggressive is True, we allow possibly unsafe fixes (E711, E712). 2642 2643 """ 2644 non_docstring_string_line_numbers = multiline_string_lines( 2645 source, include_docstrings=False) 2646 all_string_line_numbers = multiline_string_lines( 2647 source, include_docstrings=True) 2648 2649 commented_out_code_line_numbers = commented_out_code_lines(source) 2650 2651 for r in results: 2652 issue_id = r['id'].lower() 2653 2654 if r['line'] in non_docstring_string_line_numbers: 2655 if issue_id.startswith(('e1', 'e501', 'w191')): 2656 continue 2657 2658 if r['line'] in all_string_line_numbers: 2659 if issue_id in ['e501']: 2660 continue 2661 2662 # We must offset by 1 for lines that contain the trailing contents of 2663 # multiline strings. 2664 if not aggressive and (r['line'] + 1) in all_string_line_numbers: 2665 # Do not modify multiline strings in non-aggressive mode. Remove 2666 # trailing whitespace could break doctests. 2667 if issue_id.startswith(('w29', 'w39')): 2668 continue 2669 2670 if aggressive <= 0: 2671 if issue_id.startswith(('e711', 'w6')): 2672 continue 2673 2674 if aggressive <= 1: 2675 if issue_id.startswith(('e712', 'e713')): 2676 continue 2677 2678 if r['line'] in commented_out_code_line_numbers: 2679 if issue_id.startswith(('e26', 'e501')): 2680 continue 2681 2682 yield r 2683 2684 2685 def multiline_string_lines(source, include_docstrings=False): 2686 """Return line numbers that are within multiline strings. 2687 2688 The line numbers are indexed at 1. 2689 2690 Docstrings are ignored. 2691 2692 """ 2693 line_numbers = set() 2694 previous_token_type = '' 2695 try: 2696 for t in generate_tokens(source): 2697 token_type = t[0] 2698 start_row = t[2][0] 2699 end_row = t[3][0] 2700 2701 if token_type == tokenize.STRING and start_row != end_row: 2702 if ( 2703 include_docstrings or 2704 previous_token_type != tokenize.INDENT 2705 ): 2706 # We increment by one since we want the contents of the 2707 # string. 2708 line_numbers |= set(range(1 + start_row, 1 + end_row)) 2709 2710 previous_token_type = token_type 2711 except (SyntaxError, tokenize.TokenError): 2712 pass 2713 2714 return line_numbers 2715 2716 2717 def commented_out_code_lines(source): 2718 """Return line numbers of comments that are likely code. 2719 2720 Commented-out code is bad practice, but modifying it just adds even more 2721 clutter. 2722 2723 """ 2724 line_numbers = [] 2725 try: 2726 for t in generate_tokens(source): 2727 token_type = t[0] 2728 token_string = t[1] 2729 start_row = t[2][0] 2730 line = t[4] 2731 2732 # Ignore inline comments. 2733 if not line.lstrip().startswith('#'): 2734 continue 2735 2736 if token_type == tokenize.COMMENT: 2737 stripped_line = token_string.lstrip('#').strip() 2738 if ( 2739 ' ' in stripped_line and 2740 '#' not in stripped_line and 2741 check_syntax(stripped_line) 2742 ): 2743 line_numbers.append(start_row) 2744 except (SyntaxError, tokenize.TokenError): 2745 pass 2746 2747 return line_numbers 2748 2749 2750 def shorten_comment(line, max_line_length, last_comment=False): 2751 """Return trimmed or split long comment line. 2752 2753 If there are no comments immediately following it, do a text wrap. 2754 Doing this wrapping on all comments in general would lead to jagged 2755 comment text. 2756 2757 """ 2758 assert len(line) > max_line_length 2759 line = line.rstrip() 2760 2761 # PEP 8 recommends 72 characters for comment text. 2762 indentation = _get_indentation(line) + '# ' 2763 max_line_length = min(max_line_length, 2764 len(indentation) + 72) 2765 2766 MIN_CHARACTER_REPEAT = 5 2767 if ( 2768 len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and 2769 not line[-1].isalnum() 2770 ): 2771 # Trim comments that end with things like --------- 2772 return line[:max_line_length] + '\n' 2773 elif last_comment and re.match(r'\s*#+\s*\w+', line): 2774 import textwrap 2775 split_lines = textwrap.wrap(line.lstrip(' \t#'), 2776 initial_indent=indentation, 2777 subsequent_indent=indentation, 2778 width=max_line_length, 2779 break_long_words=False, 2780 break_on_hyphens=False) 2781 return '\n'.join(split_lines) + '\n' 2782 else: 2783 return line + '\n' 2784 2785 2786 def normalize_line_endings(lines, newline): 2787 """Return fixed line endings. 2788 2789 All lines will be modified to use the most common line ending. 2790 2791 """ 2792 return [line.rstrip('\n\r') + newline for line in lines] 2793 2794 2795 def mutual_startswith(a, b): 2796 return b.startswith(a) or a.startswith(b) 2797 2798 2799 def code_match(code, select, ignore): 2800 if ignore: 2801 assert not isinstance(ignore, unicode) 2802 for ignored_code in [c.strip() for c in ignore]: 2803 if mutual_startswith(code.lower(), ignored_code.lower()): 2804 return False 2805 2806 if select: 2807 assert not isinstance(select, unicode) 2808 for selected_code in [c.strip() for c in select]: 2809 if mutual_startswith(code.lower(), selected_code.lower()): 2810 return True 2811 return False 2812 2813 return True 2814 2815 2816 def fix_code(source, options=None): 2817 """Return fixed source code.""" 2818 if not options: 2819 options = parse_args(['']) 2820 2821 if not isinstance(source, unicode): 2822 source = source.decode(locale.getpreferredencoding()) 2823 2824 sio = io.StringIO(source) 2825 return fix_lines(sio.readlines(), options=options) 2826 2827 2828 def fix_lines(source_lines, options, filename=''): 2829 """Return fixed source code.""" 2830 # Transform everything to line feed. Then change them back to original 2831 # before returning fixed source code. 2832 original_newline = find_newline(source_lines) 2833 tmp_source = ''.join(normalize_line_endings(source_lines, '\n')) 2834 2835 # Keep a history to break out of cycles. 2836 previous_hashes = set() 2837 2838 if options.line_range: 2839 fixed_source = apply_local_fixes(tmp_source, options) 2840 else: 2841 # Apply global fixes only once (for efficiency). 2842 fixed_source = apply_global_fixes(tmp_source, options) 2843 2844 passes = 0 2845 long_line_ignore_cache = set() 2846 while hash(fixed_source) not in previous_hashes: 2847 if options.pep8_passes >= 0 and passes > options.pep8_passes: 2848 break 2849 passes += 1 2850 2851 previous_hashes.add(hash(fixed_source)) 2852 2853 tmp_source = copy.copy(fixed_source) 2854 2855 fix = FixPEP8( 2856 filename, 2857 options, 2858 contents=tmp_source, 2859 long_line_ignore_cache=long_line_ignore_cache) 2860 2861 fixed_source = fix.fix() 2862 2863 sio = io.StringIO(fixed_source) 2864 return ''.join(normalize_line_endings(sio.readlines(), original_newline)) 2865 2866 2867 def fix_file(filename, options=None, output=None): 2868 if not options: 2869 options = parse_args([filename]) 2870 2871 original_source = readlines_from_file(filename) 2872 2873 fixed_source = original_source 2874 2875 if options.in_place or output: 2876 encoding = detect_encoding(filename) 2877 2878 if output: 2879 output = codecs.getwriter(encoding)(output.buffer 2880 if hasattr(output, 'buffer') 2881 else output) 2882 2883 output = LineEndingWrapper(output) 2884 2885 fixed_source = fix_lines(fixed_source, options, filename=filename) 2886 2887 if options.diff: 2888 new = io.StringIO(fixed_source) 2889 new = new.readlines() 2890 diff = get_diff_text(original_source, new, filename) 2891 if output: 2892 output.write(diff) 2893 output.flush() 2894 else: 2895 return diff 2896 elif options.in_place: 2897 fp = open_with_encoding(filename, encoding=encoding, 2898 mode='w') 2899 fp.write(fixed_source) 2900 fp.close() 2901 else: 2902 if output: 2903 output.write(fixed_source) 2904 output.flush() 2905 else: 2906 return fixed_source 2907 2908 2909 def global_fixes(): 2910 """Yield multiple (code, function) tuples.""" 2911 for function in globals().values(): 2912 if inspect.isfunction(function): 2913 arguments = inspect.getargspec(function)[0] 2914 if arguments[:1] != ['source']: 2915 continue 2916 2917 code = extract_code_from_function(function) 2918 if code: 2919 yield (code, function) 2920 2921 2922 def apply_global_fixes(source, options, where='global'): 2923 """Run global fixes on source code. 2924 2925 These are fixes that only need be done once (unlike those in 2926 FixPEP8, which are dependent on pep8). 2927 2928 """ 2929 if code_match('E101', select=options.select, ignore=options.ignore): 2930 source = reindent(source, 2931 indent_size=options.indent_size) 2932 2933 for (code, function) in global_fixes(): 2934 if code_match(code, select=options.select, ignore=options.ignore): 2935 if options.verbose: 2936 print('---> Applying {0} fix for {1}'.format(where, 2937 code.upper()), 2938 file=sys.stderr) 2939 source = function(source, 2940 aggressive=options.aggressive) 2941 2942 source = fix_2to3(source, 2943 aggressive=options.aggressive, 2944 select=options.select, 2945 ignore=options.ignore) 2946 2947 return source 2948 2949 2950 def apply_local_fixes(source, options): 2951 """Ananologus to apply_global_fixes, but runs only those which makes sense 2952 for the given line_range. 2953 2954 Do as much as we can without breaking code. 2955 2956 """ 2957 def find_ge(a, x): 2958 """Find leftmost item greater than or equal to x.""" 2959 i = bisect.bisect_left(a, x) 2960 if i != len(a): 2961 return i, a[i] 2962 return len(a) - 1, a[-1] 2963 2964 def find_le(a, x): 2965 """Find rightmost value less than or equal to x.""" 2966 i = bisect.bisect_right(a, x) 2967 if i: 2968 return i - 1, a[i - 1] 2969 return 0, a[0] 2970 2971 def local_fix(source, start_log, end_log, 2972 start_lines, end_lines, indents, last_line): 2973 """apply_global_fixes to the source between start_log and end_log. 2974 2975 The subsource must be the correct syntax of a complete python program 2976 (but all lines may share an indentation). The subsource's shared indent 2977 is removed, fixes are applied and the indent prepended back. Taking 2978 care to not reindent strings. 2979 2980 last_line is the strict cut off (options.line_range[1]), so that 2981 lines after last_line are not modified. 2982 2983 """ 2984 if end_log < start_log: 2985 return source 2986 2987 ind = indents[start_log] 2988 indent = _get_indentation(source[start_lines[start_log]]) 2989 2990 sl = slice(start_lines[start_log], end_lines[end_log] + 1) 2991 2992 subsource = source[sl] 2993 # Remove indent from subsource. 2994 if ind: 2995 for line_no in start_lines[start_log:end_log + 1]: 2996 pos = line_no - start_lines[start_log] 2997 subsource[pos] = subsource[pos][ind:] 2998 2999 # Fix indentation of subsource. 3000 fixed_subsource = apply_global_fixes(''.join(subsource), 3001 options, 3002 where='local') 3003 fixed_subsource = fixed_subsource.splitlines(True) 3004 3005 # Add back indent for non multi-line strings lines. 3006 msl = multiline_string_lines(''.join(fixed_subsource), 3007 include_docstrings=False) 3008 for i, line in enumerate(fixed_subsource): 3009 if not i + 1 in msl: 3010 fixed_subsource[i] = indent + line if line != '\n' else line 3011 3012 # We make a special case to look at the final line, if it's a multiline 3013 # *and* the cut off is somewhere inside it, we take the fixed 3014 # subset up until last_line, this assumes that the number of lines 3015 # does not change in this multiline line. 3016 changed_lines = len(fixed_subsource) 3017 if (start_lines[end_log] != end_lines[end_log] 3018 and end_lines[end_log] > last_line): 3019 after_end = end_lines[end_log] - last_line 3020 fixed_subsource = (fixed_subsource[:-after_end] + 3021 source[sl][-after_end:]) 3022 changed_lines -= after_end 3023 3024 options.line_range[1] = (options.line_range[0] + 3025 changed_lines - 1) 3026 3027 return (source[:start_lines[start_log]] + 3028 fixed_subsource + 3029 source[end_lines[end_log] + 1:]) 3030 3031 def is_continued_stmt(line, 3032 continued_stmts=frozenset(['else', 'elif', 3033 'finally', 'except'])): 3034 return re.split('[ :]', line.strip(), 1)[0] in continued_stmts 3035 3036 assert options.line_range 3037 start, end = options.line_range 3038 start -= 1 3039 end -= 1 3040 last_line = end # We shouldn't modify lines after this cut-off. 3041 3042 try: 3043 logical = _find_logical(source) 3044 except (SyntaxError, tokenize.TokenError): 3045 return ''.join(source) 3046 3047 if not logical[0]: 3048 # Just blank lines, this should imply that it will become '\n' ? 3049 return apply_global_fixes(source, options) 3050 3051 start_lines, indents = zip(*logical[0]) 3052 end_lines, _ = zip(*logical[1]) 3053 3054 source = source.splitlines(True) 3055 3056 start_log, start = find_ge(start_lines, start) 3057 end_log, end = find_le(start_lines, end) 3058 3059 # Look behind one line, if it's indented less than current indent 3060 # then we can move to this previous line knowing that its 3061 # indentation level will not be changed. 3062 if (start_log > 0 3063 and indents[start_log - 1] < indents[start_log] 3064 and not is_continued_stmt(source[start_log - 1])): 3065 start_log -= 1 3066 start = start_lines[start_log] 3067 3068 while start < end: 3069 3070 if is_continued_stmt(source[start]): 3071 start_log += 1 3072 start = start_lines[start_log] 3073 continue 3074 3075 ind = indents[start_log] 3076 for t in itertools.takewhile(lambda t: t[1][1] >= ind, 3077 enumerate(logical[0][start_log:])): 3078 n_log, n = start_log + t[0], t[1][0] 3079 # start shares indent up to n. 3080 3081 if n <= end: 3082 source = local_fix(source, start_log, n_log, 3083 start_lines, end_lines, 3084 indents, last_line) 3085 start_log = n_log if n == end else n_log + 1 3086 start = start_lines[start_log] 3087 continue 3088 3089 else: 3090 # Look at the line after end and see if allows us to reindent. 3091 after_end_log, after_end = find_ge(start_lines, end + 1) 3092 3093 if indents[after_end_log] > indents[start_log]: 3094 start_log, start = find_ge(start_lines, start + 1) 3095 continue 3096 3097 if (indents[after_end_log] == indents[start_log] 3098 and is_continued_stmt(source[after_end])): 3099 # find n, the beginning of the last continued statement 3100 # Apply fix to previous block if there is one. 3101 only_block = True 3102 for n, n_ind in logical[0][start_log:end_log + 1][::-1]: 3103 if n_ind == ind and not is_continued_stmt(source[n]): 3104 n_log = start_lines.index(n) 3105 source = local_fix(source, start_log, n_log - 1, 3106 start_lines, end_lines, 3107 indents, last_line) 3108 start_log = n_log + 1 3109 start = start_lines[start_log] 3110 only_block = False 3111 break 3112 if only_block: 3113 end_log, end = find_le(start_lines, end - 1) 3114 continue 3115 3116 source = local_fix(source, start_log, end_log, 3117 start_lines, end_lines, 3118 indents, last_line) 3119 break 3120 3121 return ''.join(source) 3122 3123 3124 def extract_code_from_function(function): 3125 """Return code handled by function.""" 3126 if not function.__name__.startswith('fix_'): 3127 return None 3128 3129 code = re.sub('^fix_', '', function.__name__) 3130 if not code: 3131 return None 3132 3133 try: 3134 int(code[1:]) 3135 except ValueError: 3136 return None 3137 3138 return code 3139 3140 3141 def create_parser(): 3142 """Return command-line parser.""" 3143 # Do import locally to be friendly to those who use autopep8 as a library 3144 # and are supporting Python 2.6. 3145 import argparse 3146 3147 parser = argparse.ArgumentParser(description=docstring_summary(__doc__), 3148 prog='autopep8') 3149 parser.add_argument('--version', action='version', 3150 version='%(prog)s ' + __version__) 3151 parser.add_argument('-v', '--verbose', action='count', dest='verbose', 3152 default=0, 3153 help='print verbose messages; ' 3154 'multiple -v result in more verbose messages') 3155 parser.add_argument('-d', '--diff', action='store_true', dest='diff', 3156 help='print the diff for the fixed source') 3157 parser.add_argument('-i', '--in-place', action='store_true', 3158 help='make changes to files in place') 3159 parser.add_argument('-r', '--recursive', action='store_true', 3160 help='run recursively over directories; ' 3161 'must be used with --in-place or --diff') 3162 parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1, 3163 help='number of parallel jobs; ' 3164 'match CPU count if value is less than 1') 3165 parser.add_argument('-p', '--pep8-passes', metavar='n', 3166 default=-1, type=int, 3167 help='maximum number of additional pep8 passes ' 3168 '(default: infinite)') 3169 parser.add_argument('-a', '--aggressive', action='count', default=0, 3170 help='enable non-whitespace changes; ' 3171 'multiple -a result in more aggressive changes') 3172 parser.add_argument('--experimental', action='store_true', 3173 help='enable experimental fixes') 3174 parser.add_argument('--exclude', metavar='globs', 3175 help='exclude file/directory names that match these ' 3176 'comma-separated globs') 3177 parser.add_argument('--list-fixes', action='store_true', 3178 help='list codes for fixes; ' 3179 'used by --ignore and --select') 3180 parser.add_argument('--ignore', metavar='errors', default='', 3181 help='do not fix these errors/warnings ' 3182 '(default: {0})'.format(DEFAULT_IGNORE)) 3183 parser.add_argument('--select', metavar='errors', default='', 3184 help='fix only these errors/warnings (e.g. E4,W)') 3185 parser.add_argument('--max-line-length', metavar='n', default=79, type=int, 3186 help='set maximum allowed line length ' 3187 '(default: %(default)s)') 3188 parser.add_argument('--range', metavar='line', dest='line_range', 3189 default=None, type=int, nargs=2, 3190 help='only fix errors found within this inclusive ' 3191 'range of line numbers (e.g. 1 99); ' 3192 'line numbers are indexed at 1') 3193 parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE, 3194 type=int, metavar='n', 3195 help='number of spaces per indent level ' 3196 '(default %(default)s)') 3197 parser.add_argument('files', nargs='*', 3198 help="files to format or '-' for standard in") 3199 3200 return parser 3201 3202 3203 def parse_args(arguments): 3204 """Parse command-line options.""" 3205 parser = create_parser() 3206 args = parser.parse_args(arguments) 3207 3208 if not args.files and not args.list_fixes: 3209 parser.error('incorrect number of arguments') 3210 3211 args.files = [decode_filename(name) for name in args.files] 3212 3213 if '-' in args.files: 3214 if len(args.files) > 1: 3215 parser.error('cannot mix stdin and regular files') 3216 3217 if args.diff: 3218 parser.error('--diff cannot be used with standard input') 3219 3220 if args.in_place: 3221 parser.error('--in-place cannot be used with standard input') 3222 3223 if args.recursive: 3224 parser.error('--recursive cannot be used with standard input') 3225 3226 if len(args.files) > 1 and not (args.in_place or args.diff): 3227 parser.error('autopep8 only takes one filename as argument ' 3228 'unless the "--in-place" or "--diff" args are ' 3229 'used') 3230 3231 if args.recursive and not (args.in_place or args.diff): 3232 parser.error('--recursive must be used with --in-place or --diff') 3233 3234 if args.exclude and not args.recursive: 3235 parser.error('--exclude is only relevant when used with --recursive') 3236 3237 if args.in_place and args.diff: 3238 parser.error('--in-place and --diff are mutually exclusive') 3239 3240 if args.max_line_length <= 0: 3241 parser.error('--max-line-length must be greater than 0') 3242 3243 if args.select: 3244 args.select = args.select.split(',') 3245 3246 if args.ignore: 3247 args.ignore = args.ignore.split(',') 3248 elif not args.select: 3249 if args.aggressive: 3250 # Enable everything by default if aggressive. 3251 args.select = ['E', 'W'] 3252 else: 3253 args.ignore = DEFAULT_IGNORE.split(',') 3254 3255 if args.exclude: 3256 args.exclude = args.exclude.split(',') 3257 else: 3258 args.exclude = [] 3259 3260 if args.jobs < 1: 3261 # Do not import multiprocessing globally in case it is not supported 3262 # on the platform. 3263 import multiprocessing 3264 args.jobs = multiprocessing.cpu_count() 3265 3266 if args.jobs > 1 and not args.in_place: 3267 parser.error('parallel jobs requires --in-place') 3268 3269 if args.line_range: 3270 if args.line_range[0] <= 0: 3271 parser.error('--range must be positive numbers') 3272 if args.line_range[0] > args.line_range[1]: 3273 parser.error('First value of --range should be less than or equal ' 3274 'to the second') 3275 3276 return args 3277 3278 3279 def decode_filename(filename): 3280 """Return Unicode filename.""" 3281 if isinstance(filename, unicode): 3282 return filename 3283 else: 3284 return filename.decode(sys.getfilesystemencoding()) 3285 3286 3287 def supported_fixes(): 3288 """Yield pep8 error codes that autopep8 fixes. 3289 3290 Each item we yield is a tuple of the code followed by its 3291 description. 3292 3293 """ 3294 yield ('E101', docstring_summary(reindent.__doc__)) 3295 3296 instance = FixPEP8(filename=None, options=None, contents='') 3297 for attribute in dir(instance): 3298 code = re.match('fix_([ew][0-9][0-9][0-9])', attribute) 3299 if code: 3300 yield ( 3301 code.group(1).upper(), 3302 re.sub(r'\s+', ' ', 3303 docstring_summary(getattr(instance, attribute).__doc__)) 3304 ) 3305 3306 for (code, function) in sorted(global_fixes()): 3307 yield (code.upper() + (4 - len(code)) * ' ', 3308 re.sub(r'\s+', ' ', docstring_summary(function.__doc__))) 3309 3310 for code in sorted(CODE_TO_2TO3): 3311 yield (code.upper() + (4 - len(code)) * ' ', 3312 re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__))) 3313 3314 3315 def docstring_summary(docstring): 3316 """Return summary of docstring.""" 3317 return docstring.split('\n')[0] 3318 3319 3320 def line_shortening_rank(candidate, indent_word, max_line_length, 3321 experimental=False): 3322 """Return rank of candidate. 3323 3324 This is for sorting candidates. 3325 3326 """ 3327 if not candidate.strip(): 3328 return 0 3329 3330 rank = 0 3331 lines = candidate.split('\n') 3332 3333 offset = 0 3334 if ( 3335 not lines[0].lstrip().startswith('#') and 3336 lines[0].rstrip()[-1] not in '([{' 3337 ): 3338 for (opening, closing) in ('()', '[]', '{}'): 3339 # Don't penalize empty containers that aren't split up. Things like 3340 # this "foo(\n )" aren't particularly good. 3341 opening_loc = lines[0].find(opening) 3342 closing_loc = lines[0].find(closing) 3343 if opening_loc >= 0: 3344 if closing_loc < 0 or closing_loc != opening_loc + 1: 3345 offset = max(offset, 1 + opening_loc) 3346 3347 current_longest = max(offset + len(x.strip()) for x in lines) 3348 3349 rank += 4 * max(0, current_longest - max_line_length) 3350 3351 rank += len(lines) 3352 3353 # Too much variation in line length is ugly. 3354 rank += 2 * standard_deviation(len(line) for line in lines) 3355 3356 bad_staring_symbol = { 3357 '(': ')', 3358 '[': ']', 3359 '{': '}'}.get(lines[0][-1]) 3360 3361 if len(lines) > 1: 3362 if ( 3363 bad_staring_symbol and 3364 lines[1].lstrip().startswith(bad_staring_symbol) 3365 ): 3366 rank += 20 3367 3368 for lineno, current_line in enumerate(lines): 3369 current_line = current_line.strip() 3370 3371 if current_line.startswith('#'): 3372 continue 3373 3374 for bad_start in ['.', '%', '+', '-', '/']: 3375 if current_line.startswith(bad_start): 3376 rank += 100 3377 3378 # Do not tolerate operators on their own line. 3379 if current_line == bad_start: 3380 rank += 1000 3381 3382 if current_line.endswith(('(', '[', '{', '.')): 3383 # Avoid lonely opening. They result in longer lines. 3384 if len(current_line) <= len(indent_word): 3385 rank += 100 3386 3387 # Avoid the ugliness of ", (\n". 3388 if ( 3389 current_line.endswith('(') and 3390 current_line[:-1].rstrip().endswith(',') 3391 ): 3392 rank += 100 3393 3394 # Also avoid the ugliness of "foo.\nbar" 3395 if current_line.endswith('.'): 3396 rank += 100 3397 3398 if has_arithmetic_operator(current_line): 3399 rank += 100 3400 3401 if current_line.endswith(('%', '(', '[', '{')): 3402 rank -= 20 3403 3404 # Try to break list comprehensions at the "for". 3405 if current_line.startswith('for '): 3406 rank -= 50 3407 3408 if current_line.endswith('\\'): 3409 # If a line ends in \-newline, it may be part of a 3410 # multiline string. In that case, we would like to know 3411 # how long that line is without the \-newline. If it's 3412 # longer than the maximum, or has comments, then we assume 3413 # that the \-newline is an okay candidate and only 3414 # penalize it a bit. 3415 total_len = len(current_line) 3416 lineno += 1 3417 while lineno < len(lines): 3418 total_len += len(lines[lineno]) 3419 3420 if lines[lineno].lstrip().startswith('#'): 3421 total_len = max_line_length 3422 break 3423 3424 if not lines[lineno].endswith('\\'): 3425 break 3426 3427 lineno += 1 3428 3429 if total_len < max_line_length: 3430 rank += 10 3431 else: 3432 rank += 100 if experimental else 1 3433 3434 # Prefer breaking at commas rather than colon. 3435 if ',' in current_line and current_line.endswith(':'): 3436 rank += 10 3437 3438 rank += 10 * count_unbalanced_brackets(current_line) 3439 3440 return max(0, rank) 3441 3442 3443 def standard_deviation(numbers): 3444 """Return standard devation.""" 3445 numbers = list(numbers) 3446 if not numbers: 3447 return 0 3448 mean = sum(numbers) / len(numbers) 3449 return (sum((n - mean) ** 2 for n in numbers) / 3450 len(numbers)) ** .5 3451 3452 3453 def has_arithmetic_operator(line): 3454 """Return True if line contains any arithmetic operators.""" 3455 for operator in pep8.ARITHMETIC_OP: 3456 if operator in line: 3457 return True 3458 3459 return False 3460 3461 3462 def count_unbalanced_brackets(line): 3463 """Return number of unmatched open/close brackets.""" 3464 count = 0 3465 for opening, closing in ['()', '[]', '{}']: 3466 count += abs(line.count(opening) - line.count(closing)) 3467 3468 return count 3469 3470 3471 def split_at_offsets(line, offsets): 3472 """Split line at offsets. 3473 3474 Return list of strings. 3475 3476 """ 3477 result = [] 3478 3479 previous_offset = 0 3480 current_offset = 0 3481 for current_offset in sorted(offsets): 3482 if current_offset < len(line) and previous_offset != current_offset: 3483 result.append(line[previous_offset:current_offset].strip()) 3484 previous_offset = current_offset 3485 3486 result.append(line[current_offset:]) 3487 3488 return result 3489 3490 3491 class LineEndingWrapper(object): 3492 3493 r"""Replace line endings to work with sys.stdout. 3494 3495 It seems that sys.stdout expects only '\n' as the line ending, no matter 3496 the platform. Otherwise, we get repeated line endings. 3497 3498 """ 3499 3500 def __init__(self, output): 3501 self.__output = output 3502 3503 def write(self, s): 3504 self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n')) 3505 3506 def flush(self): 3507 self.__output.flush() 3508 3509 3510 def match_file(filename, exclude): 3511 """Return True if file is okay for modifying/recursing.""" 3512 base_name = os.path.basename(filename) 3513 3514 if base_name.startswith('.'): 3515 return False 3516 3517 for pattern in exclude: 3518 if fnmatch.fnmatch(base_name, pattern): 3519 return False 3520 3521 if not os.path.isdir(filename) and not is_python_file(filename): 3522 return False 3523 3524 return True 3525 3526 3527 def find_files(filenames, recursive, exclude): 3528 """Yield filenames.""" 3529 while filenames: 3530 name = filenames.pop(0) 3531 if recursive and os.path.isdir(name): 3532 for root, directories, children in os.walk(name): 3533 filenames += [os.path.join(root, f) for f in children 3534 if match_file(os.path.join(root, f), 3535 exclude)] 3536 directories[:] = [d for d in directories 3537 if match_file(os.path.join(root, d), 3538 exclude)] 3539 else: 3540 yield name 3541 3542 3543 def _fix_file(parameters): 3544 """Helper function for optionally running fix_file() in parallel.""" 3545 if parameters[1].verbose: 3546 print('[file:{0}]'.format(parameters[0]), file=sys.stderr) 3547 try: 3548 fix_file(*parameters) 3549 except IOError as error: 3550 print(unicode(error), file=sys.stderr) 3551 3552 3553 def fix_multiple_files(filenames, options, output=None): 3554 """Fix list of files. 3555 3556 Optionally fix files recursively. 3557 3558 """ 3559 filenames = find_files(filenames, options.recursive, options.exclude) 3560 if options.jobs > 1: 3561 import multiprocessing 3562 pool = multiprocessing.Pool(options.jobs) 3563 pool.map(_fix_file, 3564 [(name, options) for name in filenames]) 3565 else: 3566 for name in filenames: 3567 _fix_file((name, options, output)) 3568 3569 3570 def is_python_file(filename): 3571 """Return True if filename is Python file.""" 3572 if filename.endswith('.py'): 3573 return True 3574 3575 try: 3576 with open_with_encoding(filename) as f: 3577 first_line = f.readlines(1)[0] 3578 except (IOError, IndexError): 3579 return False 3580 3581 if not PYTHON_SHEBANG_REGEX.match(first_line): 3582 return False 3583 3584 return True 3585 3586 3587 def is_probably_part_of_multiline(line): 3588 """Return True if line is likely part of a multiline string. 3589 3590 When multiline strings are involved, pep8 reports the error as being 3591 at the start of the multiline string, which doesn't work for us. 3592 3593 """ 3594 return ( 3595 '"""' in line or 3596 "'''" in line or 3597 line.rstrip().endswith('\\') 3598 ) 3599 3600 3601 def main(): 3602 """Tool main.""" 3603 try: 3604 # Exit on broken pipe. 3605 signal.signal(signal.SIGPIPE, signal.SIG_DFL) 3606 except AttributeError: # pragma: no cover 3607 # SIGPIPE is not available on Windows. 3608 pass 3609 3610 try: 3611 args = parse_args(sys.argv[1:]) 3612 3613 if args.list_fixes: 3614 for code, description in sorted(supported_fixes()): 3615 print('{code} - {description}'.format( 3616 code=code, description=description)) 3617 return 0 3618 3619 if args.files == ['-']: 3620 assert not args.in_place 3621 3622 # LineEndingWrapper is unnecessary here due to the symmetry between 3623 # standard in and standard out. 3624 sys.stdout.write(fix_code(sys.stdin.read(), args)) 3625 else: 3626 if args.in_place or args.diff: 3627 args.files = list(set(args.files)) 3628 else: 3629 assert len(args.files) == 1 3630 assert not args.recursive 3631 3632 fix_multiple_files(args.files, args, sys.stdout) 3633 except KeyboardInterrupt: 3634 return 1 # pragma: no cover 3635 3636 3637 class CachedTokenizer(object): 3638 3639 """A one-element cache around tokenize.generate_tokens(). 3640 3641 Original code written by Ned Batchelder, in coverage.py. 3642 3643 """ 3644 3645 def __init__(self): 3646 self.last_text = None 3647 self.last_tokens = None 3648 3649 def generate_tokens(self, text): 3650 """A stand-in for tokenize.generate_tokens().""" 3651 if text != self.last_text: 3652 string_io = io.StringIO(text) 3653 self.last_tokens = list( 3654 tokenize.generate_tokens(string_io.readline) 3655 ) 3656 self.last_text = text 3657 return self.last_tokens 3658 3659 _cached_tokenizer = CachedTokenizer() 3660 generate_tokens = _cached_tokenizer.generate_tokens 3661 3662 3663 if __name__ == '__main__': 3664 sys.exit(main()) 3665