1 #!/usr/bin/python 2 # pep8.py - Check Python source code formatting, according to PEP 8 3 # Copyright (C) 2006 Johann C. Rocholl <johann (at] rocholl.net> 4 # 5 # Permission is hereby granted, free of charge, to any person 6 # obtaining a copy of this software and associated documentation files 7 # (the "Software"), to deal in the Software without restriction, 8 # including without limitation the rights to use, copy, modify, merge, 9 # publish, distribute, sublicense, and/or sell copies of the Software, 10 # and to permit persons to whom the Software is furnished to do so, 11 # subject to the following conditions: 12 # 13 # The above copyright notice and this permission notice shall be 14 # included in all copies or substantial portions of the Software. 15 # 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 """ 26 Check Python source code formatting, according to PEP 8: 27 http://www.python.org/dev/peps/pep-0008/ 28 29 For usage and a list of options, try this: 30 $ python pep8.py -h 31 32 This program and its regression test suite live here: 33 http://github.com/jcrocholl/pep8 34 35 Groups of errors and warnings: 36 E errors 37 W warnings 38 100 indentation 39 200 whitespace 40 300 blank lines 41 400 imports 42 500 line length 43 600 deprecation 44 700 statements 45 46 You can add checks to this program by writing plugins. Each plugin is 47 a simple function that is called for each line of source code, either 48 physical or logical. 49 50 Physical line: 51 - Raw line of text from the input file. 52 53 Logical line: 54 - Multi-line statements converted to a single line. 55 - Stripped left and right. 56 - Contents of strings replaced with 'xxx' of same length. 57 - Comments removed. 58 59 The check function requests physical or logical lines by the name of 60 the first argument: 61 62 def maximum_line_length(physical_line) 63 def extraneous_whitespace(logical_line) 64 def blank_lines(logical_line, blank_lines, indent_level, line_number) 65 66 The last example above demonstrates how check plugins can request 67 additional information with extra arguments. All attributes of the 68 Checker object are available. Some examples: 69 70 lines: a list of the raw lines from the input file 71 tokens: the tokens that contribute to this logical line 72 line_number: line number in the input file 73 blank_lines: blank lines before this one 74 indent_char: first indentation character in this file (' ' or '\t') 75 indent_level: indentation (with tabs expanded to multiples of 8) 76 previous_indent_level: indentation on previous line 77 previous_logical: previous logical line 78 79 The docstring of each check function shall be the relevant part of 80 text from PEP 8. It is printed if the user enables --show-pep8. 81 Several docstrings contain examples directly from the PEP 8 document. 82 83 Okay: spam(ham[1], {eggs: 2}) 84 E201: spam( ham[1], {eggs: 2}) 85 86 These examples are verified automatically when pep8.py is run with the 87 --doctest option. You can add examples for your own check functions. 88 The format is simple: "Okay" or error/warning code followed by colon 89 and space, the rest of the line is example source code. If you put 'r' 90 before the docstring, you can use \n for newline, \t for tab and \s 91 for space. 92 93 """ 94 95 __version__ = '0.5.0' 96 97 import os 98 import sys 99 import re 100 import time 101 import inspect 102 import tokenize 103 from optparse import OptionParser 104 from keyword import iskeyword 105 from fnmatch import fnmatch 106 107 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git' 108 DEFAULT_IGNORE = ['E24'] 109 110 INDENT_REGEX = re.compile(r'([ \t]*)') 111 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') 112 SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') 113 ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') 114 E301NOT_REGEX = re.compile(r'class |def |u?r?["\']') 115 116 WHITESPACE = ' \t' 117 118 BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>', 119 '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=', 120 '%', '^', '&', '|', '=', '/', '//', '>', '<', '>>', '<<'] 121 UNARY_OPERATORS = ['**', '*', '+', '-'] 122 OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS 123 124 options = None 125 args = None 126 127 128 ############################################################################## 129 # Plugins (check functions) for physical lines 130 ############################################################################## 131 132 133 def tabs_or_spaces(physical_line, indent_char): 134 r""" 135 Never mix tabs and spaces. 136 137 The most popular way of indenting Python is with spaces only. The 138 second-most popular way is with tabs only. Code indented with a mixture 139 of tabs and spaces should be converted to using spaces exclusively. When 140 invoking the Python command line interpreter with the -t option, it issues 141 warnings about code that illegally mixes tabs and spaces. When using -tt 142 these warnings become errors. These options are highly recommended! 143 144 Okay: if a == 0:\n a = 1\n b = 1 145 E101: if a == 0:\n a = 1\n\tb = 1 146 """ 147 indent = INDENT_REGEX.match(physical_line).group(1) 148 for offset, char in enumerate(indent): 149 if char != indent_char: 150 return offset, "E101 indentation contains mixed spaces and tabs" 151 152 153 def tabs_obsolete(physical_line): 154 r""" 155 For new projects, spaces-only are strongly recommended over tabs. Most 156 editors have features that make this easy to do. 157 158 Okay: if True:\n return 159 W191: if True:\n\treturn 160 """ 161 indent = INDENT_REGEX.match(physical_line).group(1) 162 if indent.count('\t'): 163 return indent.index('\t'), "W191 indentation contains tabs" 164 165 166 def trailing_whitespace(physical_line): 167 """ 168 JCR: Trailing whitespace is superfluous. 169 170 Okay: spam(1) 171 W291: spam(1)\s 172 """ 173 physical_line = physical_line.rstrip('\n') # chr(10), newline 174 physical_line = physical_line.rstrip('\r') # chr(13), carriage return 175 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L 176 stripped = physical_line.rstrip() 177 if physical_line != stripped: 178 return len(stripped), "W291 trailing whitespace" 179 180 181 def trailing_blank_lines(physical_line, lines, line_number): 182 r""" 183 JCR: Trailing blank lines are superfluous. 184 185 Okay: spam(1) 186 W391: spam(1)\n 187 """ 188 if physical_line.strip() == '' and line_number == len(lines): 189 return 0, "W391 blank line at end of file" 190 191 192 def missing_newline(physical_line): 193 """ 194 JCR: The last line should have a newline. 195 """ 196 if physical_line.rstrip() == physical_line: 197 return len(physical_line), "W292 no newline at end of file" 198 199 200 def maximum_line_length(physical_line): 201 """ 202 Limit all lines to a maximum of 79 characters. 203 204 There are still many devices around that are limited to 80 character 205 lines; plus, limiting windows to 80 characters makes it possible to have 206 several windows side-by-side. The default wrapping on such devices looks 207 ugly. Therefore, please limit all lines to a maximum of 79 characters. 208 For flowing long blocks of text (docstrings or comments), limiting the 209 length to 72 characters is recommended. 210 """ 211 length = len(physical_line.rstrip()) 212 if length > 79: 213 return 79, "E501 line too long (%d characters)" % length 214 215 216 ############################################################################## 217 # Plugins (check functions) for logical lines 218 ############################################################################## 219 220 221 def blank_lines(logical_line, blank_lines, indent_level, line_number, 222 previous_logical, blank_lines_before_comment): 223 r""" 224 Separate top-level function and class definitions with two blank lines. 225 226 Method definitions inside a class are separated by a single blank line. 227 228 Extra blank lines may be used (sparingly) to separate groups of related 229 functions. Blank lines may be omitted between a bunch of related 230 one-liners (e.g. a set of dummy implementations). 231 232 Use blank lines in functions, sparingly, to indicate logical sections. 233 234 Okay: def a():\n pass\n\n\ndef b():\n pass 235 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass 236 237 E301: class Foo:\n b = 0\n def bar():\n pass 238 E302: def a():\n pass\n\ndef b(n):\n pass 239 E303: def a():\n pass\n\n\n\ndef b(n):\n pass 240 E303: def a():\n\n\n\n pass 241 E304: @decorator\n\ndef a():\n pass 242 """ 243 if line_number == 1: 244 return # Don't expect blank lines before the first line 245 max_blank_lines = max(blank_lines, blank_lines_before_comment) 246 if previous_logical.startswith('@'): 247 if max_blank_lines: 248 return 0, "E304 blank lines found after function decorator" 249 elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2): 250 return 0, "E303 too many blank lines (%d)" % max_blank_lines 251 elif (logical_line.startswith('def ') or 252 logical_line.startswith('class ') or 253 logical_line.startswith('@')): 254 if indent_level: 255 if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)): 256 return 0, "E301 expected 1 blank line, found 0" 257 elif max_blank_lines != 2: 258 return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines 259 260 261 def extraneous_whitespace(logical_line): 262 """ 263 Avoid extraneous whitespace in the following situations: 264 265 - Immediately inside parentheses, brackets or braces. 266 267 - Immediately before a comma, semicolon, or colon. 268 269 Okay: spam(ham[1], {eggs: 2}) 270 E201: spam( ham[1], {eggs: 2}) 271 E201: spam(ham[ 1], {eggs: 2}) 272 E201: spam(ham[1], { eggs: 2}) 273 E202: spam(ham[1], {eggs: 2} ) 274 E202: spam(ham[1 ], {eggs: 2}) 275 E202: spam(ham[1], {eggs: 2 }) 276 277 E203: if x == 4: print x, y; x, y = y , x 278 E203: if x == 4: print x, y ; x, y = y, x 279 E203: if x == 4 : print x, y; x, y = y, x 280 """ 281 line = logical_line 282 for char in '([{': 283 found = line.find(char + ' ') 284 if found > -1: 285 return found + 1, "E201 whitespace after '%s'" % char 286 for char in '}])': 287 found = line.find(' ' + char) 288 if found > -1 and line[found - 1] != ',': 289 return found, "E202 whitespace before '%s'" % char 290 for char in ',;:': 291 found = line.find(' ' + char) 292 if found > -1: 293 return found, "E203 whitespace before '%s'" % char 294 295 296 def missing_whitespace(logical_line): 297 """ 298 JCR: Each comma, semicolon or colon should be followed by whitespace. 299 300 Okay: [a, b] 301 Okay: (3,) 302 Okay: a[1:4] 303 Okay: a[:4] 304 Okay: a[1:] 305 Okay: a[1:4:2] 306 E231: ['a','b'] 307 E231: foo(bar,baz) 308 """ 309 line = logical_line 310 for index in range(len(line) - 1): 311 char = line[index] 312 if char in ',;:' and line[index + 1] not in WHITESPACE: 313 before = line[:index] 314 if char == ':' and before.count('[') > before.count(']'): 315 continue # Slice syntax, no space required 316 if char == ',' and line[index + 1] == ')': 317 continue # Allow tuple with only one element: (3,) 318 return index, "E231 missing whitespace after '%s'" % char 319 320 321 def indentation(logical_line, previous_logical, indent_char, 322 indent_level, previous_indent_level): 323 r""" 324 Use 4 spaces per indentation level. 325 326 For really old code that you don't want to mess up, you can continue to 327 use 8-space tabs. 328 329 Okay: a = 1 330 Okay: if a == 0:\n a = 1 331 E111: a = 1 332 333 Okay: for item in items:\n pass 334 E112: for item in items:\npass 335 336 Okay: a = 1\nb = 2 337 E113: a = 1\n b = 2 338 """ 339 if indent_char == ' ' and indent_level % 4: 340 return 0, "E111 indentation is not a multiple of four" 341 indent_expect = previous_logical.endswith(':') 342 if indent_expect and indent_level <= previous_indent_level: 343 return 0, "E112 expected an indented block" 344 if indent_level > previous_indent_level and not indent_expect: 345 return 0, "E113 unexpected indentation" 346 347 348 def whitespace_before_parameters(logical_line, tokens): 349 """ 350 Avoid extraneous whitespace in the following situations: 351 352 - Immediately before the open parenthesis that starts the argument 353 list of a function call. 354 355 - Immediately before the open parenthesis that starts an indexing or 356 slicing. 357 358 Okay: spam(1) 359 E211: spam (1) 360 361 Okay: dict['key'] = list[index] 362 E211: dict ['key'] = list[index] 363 E211: dict['key'] = list [index] 364 """ 365 prev_type = tokens[0][0] 366 prev_text = tokens[0][1] 367 prev_end = tokens[0][3] 368 for index in range(1, len(tokens)): 369 token_type, text, start, end, line = tokens[index] 370 if (token_type == tokenize.OP and 371 text in '([' and 372 start != prev_end and 373 prev_type == tokenize.NAME and 374 (index < 2 or tokens[index - 2][1] != 'class') and 375 (not iskeyword(prev_text))): 376 return prev_end, "E211 whitespace before '%s'" % text 377 prev_type = token_type 378 prev_text = text 379 prev_end = end 380 381 382 def whitespace_around_operator(logical_line): 383 """ 384 Avoid extraneous whitespace in the following situations: 385 386 - More than one space around an assignment (or other) operator to 387 align it with another. 388 389 Okay: a = 12 + 3 390 E221: a = 4 + 5 391 E222: a = 4 + 5 392 E223: a = 4\t+ 5 393 E224: a = 4 +\t5 394 """ 395 line = logical_line 396 for operator in OPERATORS: 397 found = line.find(' ' + operator) 398 if found > -1: 399 return found, "E221 multiple spaces before operator" 400 found = line.find(operator + ' ') 401 if found > -1: 402 return found, "E222 multiple spaces after operator" 403 found = line.find('\t' + operator) 404 if found > -1: 405 return found, "E223 tab before operator" 406 found = line.find(operator + '\t') 407 if found > -1: 408 return found, "E224 tab after operator" 409 410 411 def missing_whitespace_around_operator(logical_line, tokens): 412 r""" 413 - Always surround these binary operators with a single space on 414 either side: assignment (=), augmented assignment (+=, -= etc.), 415 comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), 416 Booleans (and, or, not). 417 418 - Use spaces around arithmetic operators. 419 420 Okay: i = i + 1 421 Okay: submitted += 1 422 Okay: x = x * 2 - 1 423 Okay: hypot2 = x * x + y * y 424 Okay: c = (a + b) * (a - b) 425 Okay: foo(bar, key='word', *args, **kwargs) 426 Okay: baz(**kwargs) 427 Okay: negative = -1 428 Okay: spam(-1) 429 Okay: alpha[:-i] 430 Okay: if not -5 < x < +5:\n pass 431 Okay: lambda *args, **kw: (args, kw) 432 433 E225: i=i+1 434 E225: submitted +=1 435 E225: x = x*2 - 1 436 E225: hypot2 = x*x + y*y 437 E225: c = (a+b) * (a-b) 438 E225: c = alpha -4 439 E225: z = x **y 440 """ 441 parens = 0 442 need_space = False 443 prev_type = tokenize.OP 444 prev_text = prev_end = None 445 for token_type, text, start, end, line in tokens: 446 if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): 447 # ERRORTOKEN is triggered by backticks in Python 3000 448 continue 449 if text in ('(', 'lambda'): 450 parens += 1 451 elif text == ')': 452 parens -= 1 453 if need_space: 454 if start == prev_end: 455 return prev_end, "E225 missing whitespace around operator" 456 need_space = False 457 elif token_type == tokenize.OP: 458 if text == '=' and parens: 459 # Allow keyword args or defaults: foo(bar=None). 460 pass 461 elif text in BINARY_OPERATORS: 462 need_space = True 463 elif text in UNARY_OPERATORS: 464 if ((prev_type != tokenize.OP or prev_text in '}])') and not 465 (prev_type == tokenize.NAME and iskeyword(prev_text))): 466 # Allow unary operators: -123, -x, +1. 467 # Allow argument unpacking: foo(*args, **kwargs). 468 need_space = True 469 if need_space and start == prev_end: 470 return prev_end, "E225 missing whitespace around operator" 471 prev_type = token_type 472 prev_text = text 473 prev_end = end 474 475 476 def whitespace_around_comma(logical_line): 477 """ 478 Avoid extraneous whitespace in the following situations: 479 480 - More than one space around an assignment (or other) operator to 481 align it with another. 482 483 JCR: This should also be applied around comma etc. 484 Note: these checks are disabled by default 485 486 Okay: a = (1, 2) 487 E241: a = (1, 2) 488 E242: a = (1,\t2) 489 """ 490 line = logical_line 491 for separator in ',;:': 492 found = line.find(separator + ' ') 493 if found > -1: 494 return found + 1, "E241 multiple spaces after '%s'" % separator 495 found = line.find(separator + '\t') 496 if found > -1: 497 return found + 1, "E242 tab after '%s'" % separator 498 499 500 def whitespace_around_named_parameter_equals(logical_line): 501 """ 502 Don't use spaces around the '=' sign when used to indicate a 503 keyword argument or a default parameter value. 504 505 Okay: def complex(real, imag=0.0): 506 Okay: return magic(r=real, i=imag) 507 Okay: boolean(a == b) 508 Okay: boolean(a != b) 509 Okay: boolean(a <= b) 510 Okay: boolean(a >= b) 511 512 E251: def complex(real, imag = 0.0): 513 E251: return magic(r = real, i = imag) 514 """ 515 parens = 0 516 window = ' ' 517 equal_ok = ['==', '!=', '<=', '>='] 518 519 for pos, c in enumerate(logical_line): 520 window = window[1:] + c 521 if parens: 522 if window[0] in WHITESPACE and window[1] == '=': 523 if window[1:] not in equal_ok: 524 issue = "E251 no spaces around keyword / parameter equals" 525 return pos, issue 526 if window[2] in WHITESPACE and window[1] == '=': 527 if window[:2] not in equal_ok: 528 issue = "E251 no spaces around keyword / parameter equals" 529 return pos, issue 530 if c == '(': 531 parens += 1 532 elif c == ')': 533 parens -= 1 534 535 536 def whitespace_before_inline_comment(logical_line, tokens): 537 """ 538 Separate inline comments by at least two spaces. 539 540 An inline comment is a comment on the same line as a statement. Inline 541 comments should be separated by at least two spaces from the statement. 542 They should start with a # and a single space. 543 544 Okay: x = x + 1 # Increment x 545 Okay: x = x + 1 # Increment x 546 E261: x = x + 1 # Increment x 547 E262: x = x + 1 #Increment x 548 E262: x = x + 1 # Increment x 549 """ 550 prev_end = (0, 0) 551 for token_type, text, start, end, line in tokens: 552 if token_type == tokenize.NL: 553 continue 554 if token_type == tokenize.COMMENT: 555 if not line[:start[1]].strip(): 556 continue 557 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: 558 return (prev_end, 559 "E261 at least two spaces before inline comment") 560 if (len(text) > 1 and text.startswith('# ') 561 or not text.startswith('# ')): 562 return start, "E262 inline comment should start with '# '" 563 else: 564 prev_end = end 565 566 567 def imports_on_separate_lines(logical_line): 568 r""" 569 Imports should usually be on separate lines. 570 571 Okay: import os\nimport sys 572 E401: import sys, os 573 574 Okay: from subprocess import Popen, PIPE 575 Okay: from myclas import MyClass 576 Okay: from foo.bar.yourclass import YourClass 577 Okay: import myclass 578 Okay: import foo.bar.yourclass 579 """ 580 line = logical_line 581 if line.startswith('import '): 582 found = line.find(',') 583 if found > -1: 584 return found, "E401 multiple imports on one line" 585 586 587 def compound_statements(logical_line): 588 r""" 589 Compound statements (multiple statements on the same line) are 590 generally discouraged. 591 592 While sometimes it's okay to put an if/for/while with a small body 593 on the same line, never do this for multi-clause statements. Also 594 avoid folding such long lines! 595 596 Okay: if foo == 'blah':\n do_blah_thing() 597 Okay: do_one() 598 Okay: do_two() 599 Okay: do_three() 600 601 E701: if foo == 'blah': do_blah_thing() 602 E701: for x in lst: total += x 603 E701: while t < 10: t = delay() 604 E701: if foo == 'blah': do_blah_thing() 605 E701: else: do_non_blah_thing() 606 E701: try: something() 607 E701: finally: cleanup() 608 E701: if foo == 'blah': one(); two(); three() 609 610 E702: do_one(); do_two(); do_three() 611 """ 612 line = logical_line 613 found = line.find(':') 614 if -1 < found < len(line) - 1: 615 before = line[:found] 616 if (before.count('{') <= before.count('}') and # {'a': 1} (dict) 617 before.count('[') <= before.count(']') and # [1:2] (slice) 618 not re.search(r'\blambda\b', before)): # lambda x: x 619 return found, "E701 multiple statements on one line (colon)" 620 found = line.find(';') 621 if -1 < found: 622 return found, "E702 multiple statements on one line (semicolon)" 623 624 625 def python_3000_has_key(logical_line): 626 """ 627 The {}.has_key() method will be removed in the future version of 628 Python. Use the 'in' operation instead, like: 629 d = {"a": 1, "b": 2} 630 if "b" in d: 631 print d["b"] 632 """ 633 pos = logical_line.find('.has_key(') 634 if pos > -1: 635 return pos, "W601 .has_key() is deprecated, use 'in'" 636 637 638 def python_3000_raise_comma(logical_line): 639 """ 640 When raising an exception, use "raise ValueError('message')" 641 instead of the older form "raise ValueError, 'message'". 642 643 The paren-using form is preferred because when the exception arguments 644 are long or include string formatting, you don't need to use line 645 continuation characters thanks to the containing parentheses. The older 646 form will be removed in Python 3000. 647 """ 648 match = RAISE_COMMA_REGEX.match(logical_line) 649 if match: 650 return match.start(1), "W602 deprecated form of raising exception" 651 652 653 def python_3000_not_equal(logical_line): 654 """ 655 != can also be written <>, but this is an obsolete usage kept for 656 backwards compatibility only. New code should always use !=. 657 The older syntax is removed in Python 3000. 658 """ 659 pos = logical_line.find('<>') 660 if pos > -1: 661 return pos, "W603 '<>' is deprecated, use '!='" 662 663 664 def python_3000_backticks(logical_line): 665 """ 666 Backticks are removed in Python 3000. 667 Use repr() instead. 668 """ 669 pos = logical_line.find('`') 670 if pos > -1: 671 return pos, "W604 backticks are deprecated, use 'repr()'" 672 673 674 ############################################################################## 675 # Helper functions 676 ############################################################################## 677 678 679 def expand_indent(line): 680 """ 681 Return the amount of indentation. 682 Tabs are expanded to the next multiple of 8. 683 684 >>> expand_indent(' ') 685 4 686 >>> expand_indent('\\t') 687 8 688 >>> expand_indent(' \\t') 689 8 690 >>> expand_indent(' \\t') 691 8 692 >>> expand_indent(' \\t') 693 16 694 """ 695 result = 0 696 for char in line: 697 if char == '\t': 698 result = result // 8 * 8 + 8 699 elif char == ' ': 700 result += 1 701 else: 702 break 703 return result 704 705 706 def mute_string(text): 707 """ 708 Replace contents with 'xxx' to prevent syntax matching. 709 710 >>> mute_string('"abc"') 711 '"xxx"' 712 >>> mute_string("'''abc'''") 713 "'''xxx'''" 714 >>> mute_string("r'abc'") 715 "r'xxx'" 716 """ 717 start = 1 718 end = len(text) - 1 719 # String modifiers (e.g. u or r) 720 if text.endswith('"'): 721 start += text.index('"') 722 elif text.endswith("'"): 723 start += text.index("'") 724 # Triple quotes 725 if text.endswith('"""') or text.endswith("'''"): 726 start += 2 727 end -= 2 728 return text[:start] + 'x' * (end - start) + text[end:] 729 730 731 def message(text): 732 """Print a message.""" 733 # print >> sys.stderr, options.prog + ': ' + text 734 # print >> sys.stderr, text 735 print(text) 736 737 738 ############################################################################## 739 # Framework to run all checks 740 ############################################################################## 741 742 743 def find_checks(argument_name): 744 """ 745 Find all globally visible functions where the first argument name 746 starts with argument_name. 747 """ 748 checks = [] 749 for name, function in globals().items(): 750 if not inspect.isfunction(function): 751 continue 752 args = inspect.getargspec(function)[0] 753 if args and args[0].startswith(argument_name): 754 codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '') 755 for code in codes or ['']: 756 if not code or not ignore_code(code): 757 checks.append((name, function, args)) 758 break 759 checks.sort() 760 return checks 761 762 763 class Checker(object): 764 """ 765 Load a Python source file, tokenize it, check coding style. 766 """ 767 768 def __init__(self, filename): 769 if filename: 770 self.filename = filename 771 try: 772 self.lines = open(filename).readlines() 773 except UnicodeDecodeError: 774 # Errors may occur with non-UTF8 files in Python 3000 775 self.lines = open(filename, errors='replace').readlines() 776 else: 777 self.filename = 'stdin' 778 self.lines = [] 779 options.counters['physical lines'] = \ 780 options.counters.get('physical lines', 0) + len(self.lines) 781 782 def readline(self): 783 """ 784 Get the next line from the input buffer. 785 """ 786 self.line_number += 1 787 if self.line_number > len(self.lines): 788 return '' 789 return self.lines[self.line_number - 1] 790 791 def readline_check_physical(self): 792 """ 793 Check and return the next physical line. This method can be 794 used to feed tokenize.generate_tokens. 795 """ 796 line = self.readline() 797 if line: 798 self.check_physical(line) 799 return line 800 801 def run_check(self, check, argument_names): 802 """ 803 Run a check plugin. 804 """ 805 arguments = [] 806 for name in argument_names: 807 arguments.append(getattr(self, name)) 808 return check(*arguments) 809 810 def check_physical(self, line): 811 """ 812 Run all physical checks on a raw input line. 813 """ 814 self.physical_line = line 815 if self.indent_char is None and len(line) and line[0] in ' \t': 816 self.indent_char = line[0] 817 for name, check, argument_names in options.physical_checks: 818 result = self.run_check(check, argument_names) 819 if result is not None: 820 offset, text = result 821 self.report_error(self.line_number, offset, text, check) 822 823 def build_tokens_line(self): 824 """ 825 Build a logical line from tokens. 826 """ 827 self.mapping = [] 828 logical = [] 829 length = 0 830 previous = None 831 for token in self.tokens: 832 token_type, text = token[0:2] 833 if token_type in (tokenize.COMMENT, tokenize.NL, 834 tokenize.INDENT, tokenize.DEDENT, 835 tokenize.NEWLINE): 836 continue 837 if token_type == tokenize.STRING: 838 text = mute_string(text) 839 if previous: 840 end_line, end = previous[3] 841 start_line, start = token[2] 842 if end_line != start_line: # different row 843 if self.lines[end_line - 1][end - 1] not in '{[(': 844 logical.append(' ') 845 length += 1 846 elif end != start: # different column 847 fill = self.lines[end_line - 1][end:start] 848 logical.append(fill) 849 length += len(fill) 850 self.mapping.append((length, token)) 851 logical.append(text) 852 length += len(text) 853 previous = token 854 self.logical_line = ''.join(logical) 855 assert self.logical_line.lstrip() == self.logical_line 856 assert self.logical_line.rstrip() == self.logical_line 857 858 def check_logical(self): 859 """ 860 Build a line from tokens and run all logical checks on it. 861 """ 862 options.counters['logical lines'] = \ 863 options.counters.get('logical lines', 0) + 1 864 self.build_tokens_line() 865 first_line = self.lines[self.mapping[0][1][2][0] - 1] 866 indent = first_line[:self.mapping[0][1][2][1]] 867 self.previous_indent_level = self.indent_level 868 self.indent_level = expand_indent(indent) 869 if options.verbose >= 2: 870 print(self.logical_line[:80].rstrip()) 871 for name, check, argument_names in options.logical_checks: 872 if options.verbose >= 3: 873 print(' ', name) 874 result = self.run_check(check, argument_names) 875 if result is not None: 876 offset, text = result 877 if isinstance(offset, tuple): 878 original_number, original_offset = offset 879 else: 880 for token_offset, token in self.mapping: 881 if offset >= token_offset: 882 original_number = token[2][0] 883 original_offset = (token[2][1] 884 + offset - token_offset) 885 self.report_error(original_number, original_offset, 886 text, check) 887 self.previous_logical = self.logical_line 888 889 def check_all(self): 890 """ 891 Run all checks on the input file. 892 """ 893 self.file_errors = 0 894 self.line_number = 0 895 self.indent_char = None 896 self.indent_level = 0 897 self.previous_logical = '' 898 self.blank_lines = 0 899 self.blank_lines_before_comment = 0 900 self.tokens = [] 901 parens = 0 902 for token in tokenize.generate_tokens(self.readline_check_physical): 903 # print(tokenize.tok_name[token[0]], repr(token)) 904 self.tokens.append(token) 905 token_type, text = token[0:2] 906 if token_type == tokenize.OP and text in '([{': 907 parens += 1 908 if token_type == tokenize.OP and text in '}])': 909 parens -= 1 910 if token_type == tokenize.NEWLINE and not parens: 911 self.check_logical() 912 self.blank_lines = 0 913 self.blank_lines_before_comment = 0 914 self.tokens = [] 915 if token_type == tokenize.NL and not parens: 916 if len(self.tokens) <= 1: 917 # The physical line contains only this token. 918 self.blank_lines += 1 919 self.tokens = [] 920 if token_type == tokenize.COMMENT: 921 source_line = token[4] 922 token_start = token[2][1] 923 if source_line[:token_start].strip() == '': 924 self.blank_lines_before_comment = max(self.blank_lines, 925 self.blank_lines_before_comment) 926 self.blank_lines = 0 927 if text.endswith('\n') and not parens: 928 # The comment also ends a physical line. This works around 929 # Python < 2.6 behaviour, which does not generate NL after 930 # a comment which is on a line by itself. 931 self.tokens = [] 932 return self.file_errors 933 934 def report_error(self, line_number, offset, text, check): 935 """ 936 Report an error, according to options. 937 """ 938 if options.quiet == 1 and not self.file_errors: 939 message(self.filename) 940 self.file_errors += 1 941 code = text[:4] 942 options.counters[code] = options.counters.get(code, 0) + 1 943 options.messages[code] = text[5:] 944 if options.quiet: 945 return 946 if options.testsuite: 947 basename = os.path.basename(self.filename) 948 if basename[:4] != code: 949 return # Don't care about other errors or warnings 950 if 'not' not in basename: 951 return # Don't print the expected error message 952 if ignore_code(code): 953 return 954 if options.counters[code] == 1 or options.repeat: 955 message("%s:%s:%d: %s" % 956 (self.filename, line_number, offset + 1, text)) 957 if options.show_source: 958 line = self.lines[line_number - 1] 959 message(line.rstrip()) 960 message(' ' * offset + '^') 961 if options.show_pep8: 962 message(check.__doc__.lstrip('\n').rstrip()) 963 964 965 def input_file(filename): 966 """ 967 Run all checks on a Python source file. 968 """ 969 if excluded(filename): 970 return {} 971 if options.verbose: 972 message('checking ' + filename) 973 files_counter_before = options.counters.get('files', 0) 974 if options.testsuite: # Keep showing errors for multiple tests 975 options.counters = {} 976 options.counters['files'] = files_counter_before + 1 977 errors = Checker(filename).check_all() 978 if options.testsuite: # Check if the expected error was found 979 basename = os.path.basename(filename) 980 code = basename[:4] 981 count = options.counters.get(code, 0) 982 if count == 0 and 'not' not in basename: 983 message("%s: error %s not found" % (filename, code)) 984 985 986 def input_dir(dirname): 987 """ 988 Check all Python source files in this directory and all subdirectories. 989 """ 990 dirname = dirname.rstrip('/') 991 if excluded(dirname): 992 return 993 for root, dirs, files in os.walk(dirname): 994 if options.verbose: 995 message('directory ' + root) 996 options.counters['directories'] = \ 997 options.counters.get('directories', 0) + 1 998 dirs.sort() 999 for subdir in dirs: 1000 if excluded(subdir): 1001 dirs.remove(subdir) 1002 files.sort() 1003 for filename in files: 1004 if filename_match(filename): 1005 input_file(os.path.join(root, filename)) 1006 1007 1008 def excluded(filename): 1009 """ 1010 Check if options.exclude contains a pattern that matches filename. 1011 """ 1012 basename = os.path.basename(filename) 1013 for pattern in options.exclude: 1014 if fnmatch(basename, pattern): 1015 # print basename, 'excluded because it matches', pattern 1016 return True 1017 1018 1019 def filename_match(filename): 1020 """ 1021 Check if options.filename contains a pattern that matches filename. 1022 If options.filename is unspecified, this always returns True. 1023 """ 1024 if not options.filename: 1025 return True 1026 for pattern in options.filename: 1027 if fnmatch(filename, pattern): 1028 return True 1029 1030 1031 def ignore_code(code): 1032 """ 1033 Check if options.ignore contains a prefix of the error code. 1034 If options.select contains a prefix of the error code, do not ignore it. 1035 """ 1036 for select in options.select: 1037 if code.startswith(select): 1038 return False 1039 for ignore in options.ignore: 1040 if code.startswith(ignore): 1041 return True 1042 1043 1044 def get_error_statistics(): 1045 """Get error statistics.""" 1046 return get_statistics("E") 1047 1048 1049 def get_warning_statistics(): 1050 """Get warning statistics.""" 1051 return get_statistics("W") 1052 1053 1054 def get_statistics(prefix=''): 1055 """ 1056 Get statistics for message codes that start with the prefix. 1057 1058 prefix='' matches all errors and warnings 1059 prefix='E' matches all errors 1060 prefix='W' matches all warnings 1061 prefix='E4' matches all errors that have to do with imports 1062 """ 1063 stats = [] 1064 keys = list(options.messages.keys()) 1065 keys.sort() 1066 for key in keys: 1067 if key.startswith(prefix): 1068 stats.append('%-7s %s %s' % 1069 (options.counters[key], key, options.messages[key])) 1070 return stats 1071 1072 1073 def get_count(prefix=''): 1074 """Return the total count of errors and warnings.""" 1075 keys = list(options.messages.keys()) 1076 count = 0 1077 for key in keys: 1078 if key.startswith(prefix): 1079 count += options.counters[key] 1080 return count 1081 1082 1083 def print_statistics(prefix=''): 1084 """Print overall statistics (number of errors and warnings).""" 1085 for line in get_statistics(prefix): 1086 print(line) 1087 1088 1089 def print_benchmark(elapsed): 1090 """ 1091 Print benchmark numbers. 1092 """ 1093 print('%-7.2f %s' % (elapsed, 'seconds elapsed')) 1094 keys = ['directories', 'files', 1095 'logical lines', 'physical lines'] 1096 for key in keys: 1097 if key in options.counters: 1098 print('%-7d %s per second (%d total)' % ( 1099 options.counters[key] / elapsed, key, 1100 options.counters[key])) 1101 1102 1103 def selftest(): 1104 """ 1105 Test all check functions with test cases in docstrings. 1106 """ 1107 count_passed = 0 1108 count_failed = 0 1109 checks = options.physical_checks + options.logical_checks 1110 for name, check, argument_names in checks: 1111 for line in check.__doc__.splitlines(): 1112 line = line.lstrip() 1113 match = SELFTEST_REGEX.match(line) 1114 if match is None: 1115 continue 1116 code, source = match.groups() 1117 checker = Checker(None) 1118 for part in source.split(r'\n'): 1119 part = part.replace(r'\t', '\t') 1120 part = part.replace(r'\s', ' ') 1121 checker.lines.append(part + '\n') 1122 options.quiet = 2 1123 options.counters = {} 1124 checker.check_all() 1125 error = None 1126 if code == 'Okay': 1127 if len(options.counters) > 1: 1128 codes = [key for key in options.counters.keys() 1129 if key != 'logical lines'] 1130 error = "incorrectly found %s" % ', '.join(codes) 1131 elif options.counters.get(code, 0) == 0: 1132 error = "failed to find %s" % code 1133 if not error: 1134 count_passed += 1 1135 else: 1136 count_failed += 1 1137 if len(checker.lines) == 1: 1138 print("pep8.py: %s: %s" % 1139 (error, checker.lines[0].rstrip())) 1140 else: 1141 print("pep8.py: %s:" % error) 1142 for line in checker.lines: 1143 print(line.rstrip()) 1144 if options.verbose: 1145 print("%d passed and %d failed." % (count_passed, count_failed)) 1146 if count_failed: 1147 print("Test failed.") 1148 else: 1149 print("Test passed.") 1150 1151 1152 def process_options(arglist=None): 1153 """ 1154 Process options passed either via arglist or via command line args. 1155 """ 1156 global options, args 1157 parser = OptionParser(version=__version__, 1158 usage="%prog [options] input ...") 1159 parser.add_option('-v', '--verbose', default=0, action='count', 1160 help="print status messages, or debug with -vv") 1161 parser.add_option('-q', '--quiet', default=0, action='count', 1162 help="report only file names, or nothing with -qq") 1163 parser.add_option('-r', '--repeat', action='store_true', 1164 help="show all occurrences of the same error") 1165 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, 1166 help="exclude files or directories which match these " 1167 "comma separated patterns (default: %s)" % 1168 DEFAULT_EXCLUDE) 1169 parser.add_option('--filename', metavar='patterns', default='*.py', 1170 help="when parsing directories, only check filenames " 1171 "matching these comma separated patterns (default: " 1172 "*.py)") 1173 parser.add_option('--select', metavar='errors', default='', 1174 help="select errors and warnings (e.g. E,W6)") 1175 parser.add_option('--ignore', metavar='errors', default='', 1176 help="skip errors and warnings (e.g. E4,W)") 1177 parser.add_option('--show-source', action='store_true', 1178 help="show source code for each error") 1179 parser.add_option('--show-pep8', action='store_true', 1180 help="show text of PEP 8 for each error") 1181 parser.add_option('--statistics', action='store_true', 1182 help="count errors and warnings") 1183 parser.add_option('--count', action='store_true', 1184 help="print total number of errors and warnings " 1185 "to standard error and set exit code to 1 if " 1186 "total is not null") 1187 parser.add_option('--benchmark', action='store_true', 1188 help="measure processing speed") 1189 parser.add_option('--testsuite', metavar='dir', 1190 help="run regression tests from dir") 1191 parser.add_option('--doctest', action='store_true', 1192 help="run doctest on myself") 1193 options, args = parser.parse_args(arglist) 1194 if options.testsuite: 1195 args.append(options.testsuite) 1196 if len(args) == 0 and not options.doctest: 1197 parser.error('input not specified') 1198 options.prog = os.path.basename(sys.argv[0]) 1199 options.exclude = options.exclude.split(',') 1200 for index in range(len(options.exclude)): 1201 options.exclude[index] = options.exclude[index].rstrip('/') 1202 if options.filename: 1203 options.filename = options.filename.split(',') 1204 if options.select: 1205 options.select = options.select.split(',') 1206 else: 1207 options.select = [] 1208 if options.ignore: 1209 options.ignore = options.ignore.split(',') 1210 elif options.select: 1211 # Ignore all checks which are not explicitly selected 1212 options.ignore = [''] 1213 elif options.testsuite or options.doctest: 1214 # For doctest and testsuite, all checks are required 1215 options.ignore = [] 1216 else: 1217 # The default choice: ignore controversial checks 1218 options.ignore = DEFAULT_IGNORE 1219 options.physical_checks = find_checks('physical_line') 1220 options.logical_checks = find_checks('logical_line') 1221 options.counters = {} 1222 options.messages = {} 1223 return options, args 1224 1225 1226 def _main(): 1227 """ 1228 Parse options and run checks on Python source. 1229 """ 1230 options, args = process_options() 1231 if options.doctest: 1232 import doctest 1233 doctest.testmod(verbose=options.verbose) 1234 selftest() 1235 start_time = time.time() 1236 for path in args: 1237 if os.path.isdir(path): 1238 input_dir(path) 1239 else: 1240 input_file(path) 1241 elapsed = time.time() - start_time 1242 if options.statistics: 1243 print_statistics() 1244 if options.benchmark: 1245 print_benchmark(elapsed) 1246 if options.count: 1247 count = get_count() 1248 if count: 1249 sys.stderr.write(str(count) + '\n') 1250 sys.exit(1) 1251 1252 1253 if __name__ == '__main__': 1254 _main() 1255